book_citations.bib

@article{Hubbard2019,
abstract = {Recent efforts by the American Statistical Association to improve statistical practice, especially in countering the misuse and abuse of null hypothesis significance testing (NHST) and p-values, are to be welcomed. But will they be successful? The present study offers compelling evidence that this will be an extraordinarily difficult task. Dramatic citation-count data on 25 articles and books severely critical of NHST's negative impact on good science, underlining that this issue was/is well known, did nothing to stem its usage over the period 1960–2007. On the contrary, employment of NHST increased during this time. To be successful in this endeavor, as well as restoring the relevance of the statistics profession to the scientific community in the 21st century, the ASA must be prepared to dispense detailed advice. This includes specifying those situations, if they can be identified, in which the p-value plays a clearly valuable role in data analysis and interpretation. The ASA might also consider a statement that recommends abandoning the use of p-values.},
author = {Hubbard, R.},
doi = {10.1080/00031305.2018.1497540},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{Will the ASA's Efforts to Improve Statistical Practice be Successful? Some Evidence to the Contrary}},
volume = {73},
year = {2019}
}
@article{Pogrow2019,
abstract = {Relying on effect size as a measure of practical significance is turning out to be just as misleading as using p-values to determine the effectiveness of interventions for improving clinical practice in complex organizations such as schools. This article explains how effect sizes have misdirected practice in education and other disciplines. Even when effect size is incorporated into RCT research the recommendations of whether interventions are effective are misleading and generally useless to practitioners. As a result, a new criterion of practical benefit is recommended for evaluating research findings about the effectiveness of interventions in complex organizations where benchmarks of existing performance exist. Practical benefit exists when the unadjusted performance of an experimental group provides a noticeable advantage over an existing benchmark. Some basic principles for determining practical benefit are provided. Practical benefit is more intuitive and is expected to enable leaders to make more accurate assessments as to whether published research findings are likely to produce noticeable improvements in their organizations. In addition, practical benefit is used routinely as the research criterion for the alternative scientific methodology of improvement science that has an established track record of being a more efficient way to develop new interventions that improve practice dramatically than RCT research. Finally, the problems with practical significance suggest that the research community should seek different inferential methods for research designed to improve clinical performance in complex organizations, as compared to methods for testing theories and medicines.},
author = {Pogrow, S.},
doi = {10.1080/00031305.2018.1549101},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{How Effect Size (Practical Significance) Misleads Clinical Practice: The Case for Switching to Practical Benefit to Assess Applied Research Findings}},
volume = {73},
year = {2019}
}
@article{Gannon2019,
abstract = {This article argues that researchers do not need to completely abandon the p-value, the best-known significance index, but should instead stop using significance levels that do not depend on sample sizes. A testing procedure is developed using a mixture of frequentist and Bayesian tools, with a significance level that is a function of sample size, obtained from a generalized form of the Neyman–Pearson Lemma that minimizes a linear combination of $\alpha$, the probability of rejecting a true null hypothesis, and $\beta$, the probability of failing to reject a false null, instead of fixing $\alpha$ and minimizing $\beta$. The resulting hypothesis tests do not violate the Likelihood Principle and do not require any constraints on the dimensionalities of the sample space and parameter space. The procedure includes an ordering of the entire sample space and uses predictive probability (density) functions, allowing for testing of both simple and compound hypotheses. Accessible examples are presented to highlight specific characteristics of the new tests.},
author = {Gannon, M. and DeBragan{\c{c}}aPereira, C. and Polpo, A.},
doi = {10.1080/00031305.2018.1518268},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{Blending Bayesian and Classical Tools to Define Optimal Sample-Size-Dependent Significance Levels}},
volume = {73},
year = {2019}
}
@article{Errington2019,
abstract = {Most scientific research is conducted by small teams of investigators who together formulate hypotheses, collect data, conduct analyses, and report novel findings. These teams operate independently as vertically integrated silos. Here we argue that scientific research that is horizontally distributed can provide substantial complementary value, aiming to maximize available resources, promote inclusiveness and transparency, and increase rigor and reliability. This alternative approach enables researchers to tackle ambitious projects that would not be possible under the standard model. Crowdsourced scientific initiatives vary in the degree of communication between project members from largely independent work curated by a coordination team to crowd collaboration on shared activities. The potential benefits and challenges of large-scale collaboration span the entire research process: ideation, study design, data collection, data analysis, reporting, and peer review. Complementing traditional small science with crowdsourced approaches can accelerate the progress of science and improve the quality of scientific research.},
author = {Errington, T. and Lai, C. and Silberzahn, R. and Uhlmann, E. and Kidwell, M. and Chartier, C. and Riegelman, A. and Nosek, B. and McCarthy, R. and Ebersole, C.},
doi = {10.1177/1745691619850561},
issn = {17456916},
journal = {Perspectives on Psychological Science},
number = {5},
publisher = {SAGE},
title = {{Scientific Utopia III: Crowdsourcing Science}},
volume = {14},
year = {2019}
}
@article{Calin-Jageman2019,
abstract = {The “New Statistics” emphasizes effect sizes, confidence intervals, meta-analysis, and the use of Open Science practices. We present three specific ways in which a New Statistics approach can help improve scientific practice: by reducing overconfidence in small samples, by reducing confirmation bias, and by fostering more cautious judgments of consistency. We illustrate these points through consideration of the literature on oxytocin and human trust, a research area that typifies some of the endemic problems that arise with poor statistical practice.},
author = {Calin-Jageman, R. and Cumming, G.},
doi = {10.1080/00031305.2018.1518266},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{The New Statistics for Better Science: Ask How Much, How Uncertain, and What Else Is Known}},
volume = {73},
year = {2019}
}
@article{Lee2018,
abstract = {Conducting scientific inquiry is expected to help students make informed decisions; however, how exactly it can help is rarely explained in science education standards. According to classroom studies, inquiry that students conduct in science classes seems to have little effect on their decision-making. Predetermined values play a large role in students' decision-making, but students do not explore these values or evaluate whether they are appropriate to the particular issue they are deciding, and they often ignore relevant scientific information. We explore how to connect inquiry and values, and how this connection can contribute to informed decision-making based on John Dewey's philosophy. Dewey argues that scientific inquiry should include value judgments and that conducting inquiry can improve the ability to make good value judgments. Value judgment is essential to informed, rational decision-making, and Dewey's ideas can explain how conducting inquiry can contribute to make an informed decision through value judgment. According to Dewey, each value judgment during inquiry is a practical judgment guiding action, and students can improve their value judgments by evaluating their actions during scientific inquiry. Thus, we suggest that students need an opportunity to explore values through scientific inquiry and that practicing value judgment will help informed decision-makings.},
author = {Lee, Eun Ah and Brown, Matthew J.},
doi = {10.1007/s11191-017-9952-9},
file = {::},
issn = {15731901},
journal = {Science and Education},
keywords = {History,Philosophy of Science,Science Education,general},
month = {mar},
number = {1-2},
pages = {63--79},
publisher = {Springer Netherlands},
title = {{Connecting Inquiry and Values in Science Education: An Approach Based on John Dewey's Philosophy}},
url = {https://doi.org/10.1007/s11191-017-9952-9},
volume = {27},
year = {2018}
}
@article{Day2019,
abstract = {In this paper, we consider some of the ways in which personal and professional values shape the ways in which criminal justice professionals practice. Using the example of offender rehabilitation, we suggest that it is the values that we hold that determine how knowledge about what works is defined and how priorities for professional engagement are set. Specifically, we argue that there has been too great an emphasis on a particular set of epistemic values and insufficient attention paid to the importance of prudential and moral or social cultural values in the way in which rehabilitation services have developed. This has resulted in practices that have largely failed to deliver the types of social benefit that are intended, supported by a policy paradigm that privileges particular approaches. The argument is illustrated with reference to the failure to deliver effective rehabilitation to Indigenous peoples in both Australia and New Zealand. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
author = {Day, Andrew and Tamatea, Armon and Geia, Lynore},
doi = {10.1080/1068316x.2018.1543422},
issn = {1068-316X},
journal = {Psychology, Crime {\&} Law},
keywords = {Accreditation,best practice,community,indigenous,knowledge,offender rehabilitation},
month = {jul},
number = {6},
pages = {577--588},
publisher = {Informa UK Limited},
title = {{Scientific inquiry and offender rehabilitation: the importance of epistemic and prudential values}},
url = {https://www.tandfonline.com/doi/abs/10.1080/1068316X.2018.1543422},
volume = {25},
year = {2019}
}
@book{Longino1990,
author = {Longino, Helen E},
isbn = {0691020515},
publisher = {Princeton University Press},
title = {{Science as social knowledge: Values and objectivity in scientific inquiry}},
year = {1990}
}
@misc{AmericanPsychologicalAssociation2017,
author = {{American Psychological Association}},
booktitle = {American Psychological Association},
title = {{Ethical principles of psychologists and code of conduct}},
url = {https://www.apa.org/ethics/code},
urldate = {2020-08-17},
year = {2017}
}
@misc{Hardwicke2018,
abstract = {Registered reports present a substantial departure from traditional publishing models with the goal of enhancing the transparency and credibility of the scientific literature. We map the evolving universe of registered reports to assess their growth, implementation and shortcomings at journals across scientific disciplines.},
author = {Hardwicke, Tom E. and Ioannidis, John P.A.},
booktitle = {Nature Human Behaviour},
doi = {10.1038/s41562-018-0444-y},
file = {::},
issn = {23973374},
keywords = {Human behaviour,Peer review,Publishing},
month = {nov},
number = {11},
pages = {793--796},
pmid = {31558810},
publisher = {Nature Publishing Group},
title = {{Mapping the universe of registered reports}},
url = {https://www.nature.com/articles/s41562-018-0444-y},
volume = {2},
year = {2018}
}
@article{Gabry2019,
abstract = {Bayesian data analysis is about more than just computing a posterior distribution, and Bayesian visualization is about more than trace plots of Markov chains. Practical Bayesian data analysis, like all data analysis, is an iterative process of model building, inference, model checking and evaluation, and model expansion. Visualization is helpful in each of these stages of the Bayesian workflow and it is indispensable when drawing inferences from the types of modern, high dimensional models that are used by applied researchers.},
archivePrefix = {arXiv},
arxivId = {1709.01449},
author = {Gabry, Jonah and Simpson, Daniel and Vehtari, Aki and Betancourt, Michael and Gelman, Andrew},
doi = {10.1111/rssa.12378},
eprint = {1709.01449},
file = {::},
issn = {1467985X},
journal = {Journal of the Royal Statistical Society. Series A: Statistics in Society},
number = {2},
title = {{Visualization in Bayesian workflow}},
volume = {182},
year = {2019}
}
@misc{Hardwicke2020,
abstract = {While some scientists study insects, molecules, brains, or clouds, other scientists study science itself. Meta-research, or research-on-research, is a burgeoning discipline that investigates efficiency, quality, and bias in the scientific ecosystem, topics that have become especially relevant amid widespread concerns about the credibility of the scientific literature. Meta-research may help calibrate the scientific ecosystem toward higher standards by providing empirical evidence that informs the iterative generation and refinement of reform initiatives. We introduce a translational framework that involves (a) identifying problems, (b) investigating problems, (c) developing solutions, and (d) evaluating solutions. In each of these areas, we review key meta-research endeavors and discuss several examples of prior and ongoing work. The scientific ecosystem is perpetually evolving; the discipline of meta-research presents an opportunity to use empirical evidence to guide its development and maximize its potential.},
author = {Hardwicke, Tom E. and Serghiou, Stylianos and Janiaud, Perrine and Danchev, Valentin and Cr{\"{u}}well, Sophia and Goodman, Steven N. and Ioannidis, John P.A.},
booktitle = {Annual Review of Statistics and Its Application},
doi = {10.1146/annurev-statistics-031219-041104},
file = {::},
issn = {2326831X},
title = {{Calibrating the scientific ecosystem through meta-research}},
volume = {7},
year = {2020}
}
@article{Szollosi2020,
author = {Szollosi, Aba and Kellen, David and Navarro, Danielle J. and Shiffrin, Richard and van Rooij, Iris and {Van Zandt}, Trisha and Donkin, Chris},
doi = {10.1016/j.tics.2019.11.009},
issn = {1879307X},
journal = {Trends in Cognitive Sciences},
keywords = {inference,preregistration,theory development},
month = {feb},
number = {2},
pages = {94--95},
pmid = {31892461},
publisher = {Elsevier Ltd},
title = {{Is Preregistration Worthwhile?}},
url = {http://www.cell.com/article/S1364661319302852/fulltext http://www.cell.com/article/S1364661319302852/abstract https://www.cell.com/trends/cognitive-sciences/abstract/S1364-6613(19)30285-2},
volume = {24},
year = {2020}
}
@article{Nosek2012,
author = {Nosek, Brian A and Spies, Jeffrey R and Motyl, Matt},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
number = {6},
pages = {615--631},
publisher = {Sage Publications Sage CA: Los Angeles, CA},
title = {{Scientific utopia: II. Restructuring incentives and practices to promote truth over publishability}},
volume = {7},
year = {2012}
}
@book{Douglas2009,
abstract = {The role of science in policymaking has gained unprecedented stature in the United States, raising questions about the place of science and scientific expertise in the democratic process. Some scientists have been given considerable epistemic authority in shaping policy on issues of great moral and cultural significance, and the politicizing of these issues has become highly contentious. Since World War II, most philosophers of science have purported the concept that science should be "value-free." InScience, Policy and the Value-Free Ideal,Heather E. Douglas argues that such an ideal is neither adequate nor desirable for science. She contends that the moral responsibilities of scientists require the consideration of values even at the heart of science. She lobbies for a new ideal in which values serve an essential function throughout scientific inquiry, but where the role values play is constrained at key points, thus protecting the integrity and objectivity of science. In this vein, Douglas outlines a system for the application of values to guide scientists through points of uncertainty fraught with moral valence. Following a philosophical analysis of the historical background of science advising and the value-free ideal, Douglas defines how values should-and should not-function in science. She discusses the distinctive direct and indirect roles for values in reasoning, and outlines seven senses of objectivity, showing how each can be employed to determine the reliability of scientific claims. Douglas then uses these philosophical insights to clarify the distinction between junk science and sound science to be used in policymaking. In conclusion, she calls for greater openness on the values utilized in policymaking, and more public participation in the policymaking process, by suggesting various models for effective use of both the public and experts in key risk assessments.},
author = {Douglas, Heather},
doi = {10.2307/j.ctt6wrc78},
isbn = {9780822960263},
month = {jul},
publisher = {University of Pittsburgh Press},
title = {{Science, Policy, and the Value-Free Ideal}},
url = {http://www.jstor.org/stable/j.ctt6wrc78},
year = {2009}
}
@incollection{sep-scientific-objectivity,
author = {Reiss, Julian and Sprenger, Jan},
booktitle = {The Stanford Encyclopedia of Philosophy},
edition = {Winter 201},
editor = {Zalta, Edward N},
howpublished = {$\backslash$url{\{}https://plato.stanford.edu/archives/win2017/entries/scientific-objectivity/{\}}},
publisher = {Metaphysics Research Lab, Stanford University},
title = {{Scientific Objectivity}},
year = {2017}
}
@misc{Williams2006,
abstract = {Objectivity and value freedom have often been conflated in the philosophical and sociological literature. While value freedom construed as an absence of social and moral values in scientific work has been discredited, defenders of value freedom bracket off methodological values or practices from social and moral ones. In this paper I will first show how values exist along a continuum and argue that science is and should be value based. One of these values is necessarily objectivity for science to be possible. However the version of objectivity I will describe is socially situated in methodological practice, but also crucially in the particular purpose of a given science. Objectivity (or its absence) may be transferred vertically from practices, goals, or discourses outside science through several levels to that of the daytoday activities of the scientist. It is also possible for this transfer to occur in the other direction and indeed objectivity can be situated in extrascientific practices and discourses. Objectivity (or its absence) may also be transferred horizontally within particular methodological practice to other disciplines or parts of a discipline. Ultimately a socially situated objectivity is an achievement of the community of science. I will use some brief contemporary and historical illustrations from science and the intersection of science and public policy to show how objectivity has been achieved or failed.},
author = {Williams, Malcolm},
booktitle = {Social Epistemology},
doi = {10.1080/02691720600807468},
issn = {02691728},
keywords = {Objectivity,Realism,Scientific Method,Truth,Value Freedom},
month = {apr},
number = {2},
pages = {163--180},
publisher = { Routledge },
title = {{Can scientists be objective?}},
url = {https://www.tandfonline.com/doi/abs/10.1080/02691720600807468},
volume = {20},
year = {2006}
}
@article{Lakens2017a,
abstract = {Scientists should be able to provide support for the absence of a meaningful effect. Currently, researchers often incorrectly conclude an effect is absent based a nonsignificant result. A widely recommended approach within a frequentist framework is to test for equivalence. In equivalence tests, such as the two one-sided tests (TOST) procedure discussed in this article, an upper and lower equivalence bound is specified based on the smallest effect size of interest. The TOST procedure can be used to statistically reject the presence of effects large enough to be considered worthwhile. This practical primer with accompanying spreadsheet and R package enables psychologists to easily perform equivalence tests (and power analyses) by setting equivalence bounds based on standardized effect sizes and provides recommendations to prespecify equivalence bounds. Extending your statistical tool kit with equivalence tests is an easy way to improve your statistical and theoretical inferences.},
author = {Lakens, Dani{\"{e}}l},
doi = {10.1177/1948550617697177},
issn = {19485514},
journal = {Social Psychological and Personality Science},
number = {4},
pmid = {28736600},
title = {{Equivalence Tests: A Practical Primer for t Tests, Correlations, and Meta-Analyses}},
volume = {8},
year = {2017}
}
@article{Kuncel2013,
abstract = {In employee selection and academic admission decisions, holistic (clinical) data combination methods continue to be relied upon and preferred by practitioners in our field. This meta-analysis examined and compared the relative predictive power of mechanical methods versus holistic methods in predicting multiple work (advancement, supervisory ratings of performance, and training performance) and academic (grade point average) criteria. There was consistent and substantial loss of validity when data were combined holistically-even by experts who are knowledgeable about the jobs and organizations in question-across multiple criteria in work and academic settings. In predicting job performance, the difference between the validity of mechanical and holistic data combination methods translated into an improvement in prediction of more than 50{\%}. Implications for evidence-based practice are discussed. {\textcopyright} 2013 American Psychological Association.},
author = {Kuncel, Nathan R. and Klieger, David M. and Connelly, Brian S. and Ones, Deniz S.},
doi = {10.1037/a0034156},
file = {::},
issn = {00219010},
journal = {Journal of Applied Psychology},
keywords = {Criterion related validity,Judgment and decision making,Mechanical versus clinical data combination},
number = {6},
pages = {1060--1072},
title = {{Mechanical versus clinical data combination in selection and admissions decisions: A meta-analysis}},
volume = {98},
year = {2013}
}
@article{Claesen2019,
archivePrefix = {arXiv},
arxivId = {10.31234/osf.io/d8wex},
author = {Claesen, A. and Gomes, S. and Tuerlinckx, F. and Vanpaemel, W. and Leuven, K. U.},
eprint = {osf.io/d8wex},
journal = {PsychArxiv},
primaryClass = {10.31234},
title = {{Preregistration: Comparing Dream to Reality}},
url = {https://psyarxiv.com/d8wex/},
year = {2019}
}
@article{Kozyreva2020,
abstract = {Abstract: The Internet has evolved into a ubiquitous digital environment in which people communicate, seek information, and make decisions. Online environments are replete with smart, highly adaptive choice architectures designed primarily to maximize commercial interests, capture and sustain users' attention, monetize user data, and predict and influence future behavior. This online landscape holds multiple negative consequences for society, such as a decline in human autonomy, rising incivility in online conversation, the facilitation of political extremism, and the spread of disinformation. Benevolent choice architects working with regulators may curb the worst excesses of manipulative choice architectures, yet the strategic advantages, resources, and data remain with commercial players. One way to address this imbalance is with interventions that empower Internet users to gain some control over their digital environments, in part by boosting their information literacy and their cognitive resistance to manipulation. Our goal is to present a conceptual map of interventions that are based on insights from psychological science. We begin by systematically outlining how online and offline environments differ despite being increasingly inextricable. We then identify four major types of challenges that users encounter in online environments: persuasive and manipulative choice architectures, AI-assisted information architectures, distractive environments, and false and misleading information. Next, we turn to how psychological science can inform interventions to counteract these challenges of the digital world. After distinguishing between three types of behavioral and cognitive interventions— nudges, technocognition, and boosts—we focus in on boosts, of which we identify two main groups: (1) those aimed at enhancing people's agency in their digital environments (e.g., self- nudging, deliberate ignorance) and (2) those aimed at boosting competences of reasoning and resilience to manipulation (e.g., simple decision aids, inoculation). These cognitive tools are designed to foster the civility of online discourse and protect reason and human autonomy against manipulative choice architectures, attention-grabbing techniques, and the spread of false information.},
author = {Kozyreva, A. and Lewandowsky, S. and Hertwig, R.},
doi = {10.31234/OSF.IO},
journal = {PsyArXiv},
keywords = {Meta,science},
pages = {1--126},
publisher = {PsyArXiv},
title = {{Citizens Versus the Internet: Confronting Digital Challenges With Cognitive Tools}},
url = {https://psyarxiv.com/ky4x8/},
year = {2020}
}
@article{Adam2019,
author = {Adam, David},
doi = {10.1126/science.aay1207},
issn = {0036-8075},
journal = {Science},
month = {may},
publisher = {American Association for the Advancement of Science (AAAS)},
title = {{A solution to psychology's reproducibility problem just failed its first test}},
year = {2019}
}
@article{Yamada2018,
abstract = {The reproducibility problem that exists in various academic fields has been discussed in recent years, and it has been revealed that scientists discreetly engage in several questionable research practices (QRPs). For example, the practice of hypothesizing after the results are known (HARKing) involves the reconstruction of hypotheses and stories after results have been obtained (Kerr, 1998) and thereby promotes the retrospective fabrication of favorable hypotheses (cf. Bem, 2004). P-hacking encompasses various untruthful manipulations for obtaining p-values less than 0.05 (Simmons et al., 2011). Such unethical practices dramatically increase the number of false positive findings and thereby encourage the intentional fabrication of evidence as the basis of scientific knowledge and theory, which leads to individual profits for researchers.},
annote = {Not all that insightful. Though it did have some ideas about how you can circumvent the limitations of pre-registration and make it look as if you pre-registered well.},
author = {Yamada, Yuki},
doi = {10.3389/fpsyg.2018.01831},
file = {::},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Academic publishing,Misconduct in research,Open science,Preregistration,QRP,pre-registration,pre-registration, criticisms},
mendeley-tags = {pre-registration,pre-registration, criticisms},
month = {sep},
number = {SEP},
pages = {1831},
publisher = {Frontiers Media S.A.},
title = {{How to Crack Pre-registration: Toward Transparent and Open Science}},
url = {https://www.frontiersin.org/article/10.3389/fpsyg.2018.01831/full},
volume = {9},
year = {2018}
}
@article{Glass1976,
author = {Glass, Gene V},
issn = {0013-189X},
journal = {Educational researcher},
number = {10},
pages = {3--8},
publisher = {Sage Publications Sage CA: Los Angeles, CA},
title = {{Primary, secondary, and meta-analysis of research}},
volume = {5},
year = {1976}
}
@article{Browne2000,
abstract = {This paper gives a review of cross-validation methods. The original applications in multiple linear regression are considered first. It is shown how predictive accuracy depends on sample size and the number of predictor variables. Both two-sample and single-sample cross-validation indices are investigated. The application of cross-validation methods to the analysis of moment structures is then justified. An equivalence of a single-sample cross-validation index and the Akaike information criterion is pointed out. It is seen that the optimal number of parameters suggested by both single-sample and two-sample cross-validation indices will depend on sample size. {\textcopyright} 2000 Academic Press.},
author = {Browne, Michael W.},
doi = {10.1006/jmps.1999.1279},
issn = {00222496},
journal = {Journal of Mathematical Psychology},
month = {mar},
number = {1},
pages = {108--132},
publisher = {Academic Press},
title = {{Cross-validation methods}},
volume = {44},
year = {2000}
}
@article{Kerr1998,
abstract = {This article considers a practice in scientific communication termed HARKing (Hypothesizing After the Results are Known). HARKing is defined as presenting a post hoc hypothesis (i.e., one based on or informed by one's results) in one's research report as if it were, in fact, an a priori hypotheses. Several forms of HARKing are identified and survey data are presented that suggests that at least some forms of HARKing are widely practiced and widely seen as inappropriate. I identify several reasons why scientists might HARK. Then I discuss several reasons why scientists ought not to HARK. It is conceded that the question of whether HARKing's costs exceed its benefits is a complex one that ought to be addressed through research, open discussion, and debate. To help stimulate such discussion (and for those such as myself who suspect that HARKing's costs do exceed its benefits), I conclude the article with some suggestions for deterring HARKing. Copyright {\textcopyright} 1998 by Lawrence Erlbaum Associates, Inc.},
author = {Kerr, Norbert L.},
doi = {10.1207/s15327957pspr0203_4},
issn = {10888683},
journal = {Personality and Social Psychology Review},
keywords = {MEDLINE,N L Kerr,NCBI,NIH,NLM,National Center for Biotechnology Information,National Institutes of Health,National Library of Medicine,PubMed Abstract,doi:10.1207/s15327957pspr0203{\_}4,pmid:15647155},
number = {3},
pages = {196--217},
publisher = {SAGE Publications Inc.},
title = {{HARKing: Hypothesizing after the results are known}},
url = {https://pubmed.ncbi.nlm.nih.gov/15647155/},
volume = {2},
year = {1998}
}
@article{Cribbie2017,
abstract = {Researchers in psychology are frequently confronted with the issue of analysing multiple relationships simultaneously. For example, this could involve multiple outcome variables or multiple predictors in a regression framework. Current recommendations typically steer researchers toward familywise or falsediscovery rate Type I error control to limit the probability of incorrectly rejecting the null hypothesis. Stepwise modified-Bonferroni procedures are suggested for following this recommendation. However, longstanding arguments against multiplicity control combined with a modern distaste for null hypothesis significance testing have warranted revisiting this debate. This paper is an exploration of both sides of the multiplicity control debate, with the goal of educating concerned parties regarding best practices for conducting multiple related tests.},
author = {Cribbie, Robert A.},
doi = {10.1037/cbs0000075},
issn = {18792669},
journal = {Canadian Journal of Behavioural Science},
keywords = {Bonferroni,effect sizes,estimation,multiplicity control,null hypothesis significance testing},
month = {jul},
number = {3},
pages = {159--165},
publisher = {American Psychological Association Inc.},
title = {{Multiplicity Control, School Uniforms, and Other Perplexing Debates}},
url = {/record/2017-29525-002},
volume = {49},
year = {2017}
}
@article{VanderLinden2017,
abstract = {Two field experiments examined the effectiveness of signs requesting hotel guests' participation in an environmental conservation program. Appeals employing descriptive norms (e.g., “the majority of guests reuse their towels”) proved superior to a traditional appeal widely used by hotels that focused solely on environmental protection. Moreover, normative appeals were most effective when describing group behavior that occurred in the setting that most closely matched individuals' immediate situational circumstances (e.g., “the majority of guests in this roomreuse their towels”), which we refer to as provincial norms. Theoretical and practical implications for managing proenvironmental efforts are discussed.},
author = {van der Linden, Sander and Chryst, Breanne},
doi = {10.3389/fams.2017.00012},
file = {::},
issn = {2297-4687},
journal = {Frontiers in Applied Mathematics and Statistics},
keywords = {Bayesian evidence synthesis,meta-analysis,reproducibility,social norms},
month = {jun},
pages = {12},
publisher = {Frontiers Media SA},
title = {{No Need for Bayes Factors: A Fully Bayesian Evidence Synthesis}},
url = {http://journal.frontiersin.org/article/10.3389/fams.2017.00012/full},
volume = {3},
year = {2017}
}
@article{Bollen2019a,
abstract = {Structural misspecifications in factor analysis include using the wrong number of factors and omitting cross loadings or correlated errors. The impact of these errors on factor loading estimates is understudied. Factor loadings underlie our assessments of the validity and reliability of indicators. Thus knowing how structural misspecifications affect a factor loading is a key issue. This paper develops analytic conditions of when misspecifications affect Bollen's (1996) model implied instrumental variable, two stage least squares (MIIV-2SLS) estimator of a factor loading. It shows that if an indicator equation is correctly specified, then correlated errors among other measures, mixing up causal indicators with reflective, omitting cross loadings, and omitting direct effects between indicators leave the MIIV-2SLS estimator of the factor loading unchanged. Alternatively, if the indicator or the scaling indicator equation is misspecified, then the loading is unlikely to be robust. The results are illustrated with hypothetical and empirical examples.},
author = {Bollen, Kenneth A.},
doi = {10.1080/10705511.2019.1691005},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Model implied instrumental variables,factor analysis,misspecified models,two stage least squares},
publisher = {Routledge},
title = {{When Good Loadings Go Bad: Robustness in Factor Analysis}},
url = {https://www.tandfonline.com/doi/abs/10.1080/10705511.2019.1691005},
year = {2019}
}
@article{Gelman2018a,
abstract = {A standard mode of inference in social and behavioral science is to establish stylized facts using statistical significance in quantitative studies. However, in a world in which measurements are noisy and effects are small, this will not work: selection on statistical significance leads to effect sizes which are overestimated and often in the wrong direction. After a brief discussion of two examples, one in economics and one in social psychology, we consider the procedural solution of open postpublication review, the design solution of devoting more effort to accurate measurements and within-person comparisons, and the statistical analysis solution of multilevel modeling and reporting all results rather than selection on significance. We argue that the current replication crisis in science arises in part from the ill effects of null hypothesis significance testing being used to study small effects with noisy data. In such settings, apparent success comes easy but truly replicable results require a more serious connection between theory, measurement, and data.},
author = {Gelman, Andrew},
doi = {10.1177/0146167217729162},
issn = {15527433},
journal = {Personality and Social Psychology Bulletin},
number = {1},
title = {{The Failure of Null Hypothesis Significance Testing When Studying Incremental Changes, and What to Do About It}},
volume = {44},
year = {2018}
}
@misc{Banks2016,
abstract = {Purpose: Questionable research or reporting practices (QRPs) contribute to a growing concern regarding the credibility of research in the organizational sciences and related fields. Such practices include design, analytic, or reporting practices that may introduce biased evidence, which can have harmful implications for evidence-based practice, theory development, and perceptions of the rigor of science. Design/Methodology/Approach: To assess the extent to which QRPs are actually a concern, we conducted a systematic review to consider the evidence on QRPs. Using a triangulation approach (e.g., by reviewing data from observations, sensitivity analyses, and surveys), we identified the good, the bad, and the ugly. Findings: Of the 64 studies that fit our criteria, 6 appeared to find little to no evidence of engagement in QRPs and the other 58 found more severe evidence (91 {\%}). Implications: Drawing upon the findings, we provide recommendations for future research related to publication practices and academic training. Originality/value: We report findings from studies that suggest that QRPs are not a problem, that QRPs are used at a suboptimal rate, and that QRPs present a threat to the viability of organizational science research.},
author = {Banks, George C. and Rogelberg, Steven G. and Woznyj, Haley M. and Landis, Ronald S. and Rupp, Deborah E.},
booktitle = {Journal of Business and Psychology},
doi = {10.1007/s10869-016-9456-7},
issn = {08893268},
number = {3},
title = {{Editorial: Evidence on Questionable Research Practices: The Good, the Bad, and the Ugly}},
volume = {31},
year = {2016}
}
@article{Marsman2017a,
abstract = {We applied three Bayesian methods to reanalyse the preregistered contributions to the Social Psychology special issue 'Replications of Important Results in Social Psychology' (Nosek {\&} Lakens. 2014 Registered reports: a method to increase the credibility of published results. Soc. Psychol. 45, 137–141. (doi:10.1027/1864-9335/a000192)). First, individualexperiment Bayesian parameter estimation revealed that for directed effect size measures, only three out of 44 central 95{\%} credible intervals did not overlap with zero and fell in the expected direction. For undirected effect size measures, only four out of 59 credible intervals contained values greater than 0.10 (10{\%} of variance explained) and only 19 intervals contained values larger than 0.05. Second, a Bayesian randomeffects meta-analysis for all 38 t-tests showed that only one out of the 38 hierarchically estimated credible intervals did not overlap with zero and fell in the expected direction. Third, a Bayes factor hypothesis test was used to quantify the evidence for the null hypothesis against a default one-sided alternative. Only seven out of 60 Bayes factors indicated non-anecdotal support in favour of the alternative hypothesis (BF10 {\textgreater}3), whereas 51 Bayes factors indicated at least some support for the null hypothesis.We hope that future analyses of replication success will embrace a more inclusive statistical approach by adopting a wider range of complementary techniques.},
author = {Marsman, Maarten and Sch{\"{o}}nbrodt, Felix D. and Morey, Richard D. and Yao, Yuling and Gelman, Andrew and Wagenmakers, Eric Jan},
doi = {10.1098/rsos.160426},
issn = {20545703},
journal = {Royal Society Open Science},
number = {1},
title = {{A Bayesian bird's eye view of 'Replications of important results in social psychology'}},
volume = {4},
year = {2017}
}
@article{Xu2020,
author = {Xu, Chengxin and Liu, Yixin},
doi = {10.31234/OSF.IO},
journal = {Preprint},
keywords = {Politics,Prejudice and Discrimination,Social and Behavioral Sciences,Social and Personality Psychology,discrimination,political psychology,social psychology,social stigma,xenophobia},
publisher = {PsyArXiv},
title = {{Social Cost with No Political Gain: The "Chinese Virus" Effect}},
url = {https://psyarxiv.com/9g5wk/},
year = {2020}
}
@article{Shaw2020,
archivePrefix = {arXiv},
arxivId = {https://doi.org/10.31234/osf.io/kdurz},
author = {Shaw, Maired and Cloos, Leonie and Luong, Raymond and Elbaz, Sasha and Flake, Jessica},
eprint = {/doi.org/10.31234/osf.io/kdurz},
journal = {PsyArxiv},
primaryClass = {https:},
title = {{Measurement Practices in Large-Scale Replications: Insights from Many Labs 2}},
url = {https://psyarxiv.com/kdurz/},
year = {2020}
}
@article{Zitzmann2020,
abstract = {This journal recently published a systematic review of simulation studies on the performance of Bayesian approaches for estimating latent variable models in small samples. The authors of this revie...},
author = {Zitzmann, Steffen and L{\"{u}}dtke, Oliver and Robitzsch, Alexander and Hecht, Martin},
doi = {10.1080/10705511.2020.1752216},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
keywords = {Bayesian estimation,Markov chain Monte Carlo,small sample,structural equation modeling},
month = {may},
pages = {1--11},
publisher = { Routledge },
title = {{On the Performance of Bayesian Approaches in Small Samples: A Comment on Smid, McNeish, Miocevic, and van de Schoot (2020)}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2020.1752216},
year = {2020}
}
@article{Hughes2019,
abstract = {Background: Missing data are unavoidable in epidemiological research, potentially leading to bias and loss of precision. Multiple imputation (MI) is widely advocated as an improvement over complete case analysis (CCA). However, contrary to widespread belief, CCA is preferable to MI in some situations. Methods: We provide guidance on choice of analysis when data are incomplete. Using causal diagrams to depict missingness mechanisms, we describe when CCA will not be biased by missing data and compare MI and CCA, with respect to bias and efficiency, in a range of missing data situations. We illustrate selection of an appropriate method in practice. Results: For most regression models, CCA gives unbiased results when the chance of being a complete case does not depend on the outcome after taking the covariates into consideration, which includes situations where data are missing not at random. Consequently, there are situations in which CCA analyses are unbiased while MI analyses, assuming missing at random (MAR), are biased. By contrast MI, unlike CCA, is valid for all MAR situations and has the potential to use information contained in the incomplete cases and auxiliary variables to reduce bias and/or improve precision. For this reason, MI was preferred over CCA in our real data example. Conclusions: Choice of method for dealing with missing data is crucial for validity of conclusions, and should be based on careful consideration of the reasons for the missing data, missing data patterns and the availability of auxiliary information.},
author = {Hughes, Rachael A. and Heron, Jon and Sterne, Jonathan A.C. and Tilling, Kate},
doi = {10.1093/ije/dyz032},
issn = {14643685},
journal = {International Journal of Epidemiology},
number = {4},
pmid = {30879056},
title = {{Accounting for missing data in statistical analyses: Multiple imputation is not always the answer}},
volume = {48},
year = {2019}
}
@article{Brown2017,
abstract = {We present a simple mathematical technique that we call granularity-related inconsistency of means (GRIM) for verifying the summary statistics of research reports in psychology. This technique evaluates whether the reported means of integer data such as Likert-type scales are consistent with the given sample size and number of items. We tested this technique with a sample of 260 recent empirical articles in leading journals. Of the articles that we could test with the GRIM technique (N = 71), around half (N = 36) appeared to contain at least one inconsistent mean, and more than 20{\%} (N = 16) contained multiple such inconsistencies. We requested the data sets corresponding to 21 of these articles, receiving positive responses in 9 cases. We confirmed the presence of at least one reporting error in all cases, with three articles requiring extensive corrections. The implications for the reliability and replicability of empirical psychology are discussed.},
author = {Brown, Nicholas J.L. and Heathers, James A.J.},
doi = {10.1177/1948550616673876},
issn = {19485514},
journal = {Social Psychological and Personality Science},
number = {4},
title = {{The GRIM Test: A Simple Technique Detects Numerous Anomalies in the Reporting of Results in Psychology}},
volume = {8},
year = {2017}
}
@article{Grimm2020,
abstract = {Recursive partitioning, also known as decision trees and classification and regression trees (CART), is a machine learning procedure that has gained traction in the behavioral sciences because of i...},
author = {Grimm, Kevin J. and Jacobucci, Ross},
doi = {10.1080/00273171.2020.1751028},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
keywords = {CART,Machine learning,reliability},
month = {apr},
pages = {1--13},
publisher = {Routledge},
title = {{Reliable Trees: Reliability Informed Recursive Partitioning for Psychological Data}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2020.1751028},
year = {2020}
}
@article{Cairo2020,
abstract = {{\textless}p{\textgreater}Selective reporting practices (SRPs)—adding, dropping, or altering study elements when preparing reports for publication—are thought to increase false positives in scientific research. Yet analyses of SRPs have been limited to self-reports or analyses of pre-registered and published studies. To assess SRPs in social psychological research more broadly, we compared doctoral dissertations defended between 1999 and 2017 with the publications based on those dissertations. Selective reporting occurred in nearly 50{\%} of studies. Fully supported dissertation hypotheses were 3 times more likely to be published than unsupported hypotheses, while unsupported hypotheses were nearly 4 times more likely to be dropped from publications. Few hypotheses were found to be altered or added post hoc. Dissertation studies with fewer supported hypotheses were more likely to remove participants or measures from publications. Selective hypothesis reporting and dropped measures significantly predicted greater hypothesis support in published studies, supporting concerns that SRPs may increase Type 1 error risk.{\textless}/p{\textgreater}},
author = {Cairo, Athena H. and Green, Jeffrey D. and Forsyth, Donelson R. and Behler, Anna Maria C. and Raldiris, Tarah L.},
doi = {10.1177/0146167220903896},
issn = {0146-1672},
journal = {Personality and Social Psychology Bulletin},
keywords = {decision making,hypothesis testing strategies,questionable research practices,selective reporting practices},
month = {feb},
pages = {014616722090389},
publisher = {SAGE Publications Inc.},
title = {{Gray (Literature) Matters: Evidence of Selective Hypothesis Reporting in Social Psychological Research}},
url = {http://journals.sagepub.com/doi/10.1177/0146167220903896},
year = {2020}
}
@inproceedings{Jun2019,
author = {Jun, Eunice and Daum, Maureen and Roesch, Jared and Chasins, Sarah and Berger, Emery and Just, Rene and Reinecke, Katharina},
booktitle = {Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology},
pages = {591--603},
title = {{Tea: A High-level Language and Runtime System for Automating Statistical Analysis}},
year = {2019}
}
@article{Szafir2016,
author = {Szafir, Danielle Albers and Haroz, Steve and Gleicher, Michael and Franconeri, Steven},
issn = {1534-7362},
journal = {Journal of vision},
number = {5},
pages = {11},
publisher = {The Association for Research in Vision and Ophthalmology},
title = {{Four types of ensemble coding in data visualizations}},
volume = {16},
year = {2016}
}
@inproceedings{Matejka2017,
author = {Matejka, Justin and Fitzmaurice, George},
booktitle = {Proceedings of the 2017 CHI Conference on Human Factors in Computing Systems},
pages = {1290--1294},
title = {{Same stats, different graphs: generating datasets with varied appearance and identical statistics through simulated annealing}},
year = {2017}
}
@article{Moritz2018,
author = {Moritz, Dominik and Wang, Chenglong and Nelson, Greg L and Lin, Halden and Smith, Adam M and Howe, Bill and Heer, Jeffrey},
issn = {1077-2626},
journal = {IEEE transactions on visualization and computer graphics},
number = {1},
pages = {438--448},
publisher = {IEEE},
title = {{Formalizing visualization design knowledge as constraints: Actionable and extensible models in draco}},
volume = {25},
year = {2018}
}
@article{Dimara2018,
author = {Dimara, Evanthia and Franconeri, Steven and Plaisant, Catherine and Bezerianos, Anastasia and Dragicevic, Pierre},
issn = {1077-2626},
journal = {IEEE transactions on visualization and computer graphics},
publisher = {IEEE},
title = {{A task-based taxonomy of cognitive biases for information visualization}},
year = {2018}
}
@book{Fife2020,
address = {Glassboro, NJ},
author = {Fife, Dustin A.},
publisher = {QuantPsych},
title = {{The Order of the Statistical Jedi: Responsibilities, Routines, and Rituals}},
year = {2020}
}
@article{Mackinlay1986,
abstract = {The goal of the research described in this paper is to develop an application-independent presentation tool that automatically designs effective graphical presentations (such as bar charts, scatter plots, and connected graphs) of relational information. Two problems are raised by this goal: The codification of graphic design criteria in a form that can be used by the presentation tool, and the generation of a wide variety of designs so that the presentation tool can accommodate a wide variety of information. The approach described in this paper is based on the view that graphical presentations are sentences of graphical languages. The graphic design issues are codified as expressiveness and effectiveness criteria for graphical languages. Expressiveness criteria determine whether a graphical language can express the desired information. Effectiveness criteria determine whether a graphical language exploits the capabilities of the output medium and the human visual system. A wide variety of designs can be systematically generated by using a composition algebra that composes a small set of primitive graphical languages. Artificial intelligence techniques are used to implement a prototype presentation tool called APT (A Presentation Tool), which is based on the composition algebra and the graphic design criteria. {\textcopyright} 1986, ACM. All rights reserved.},
author = {Mackinlay, Jock},
doi = {10.1145/22949.22950},
file = {::},
issn = {15577368},
journal = {ACM Transactions on Graphics (TOG)},
keywords = {Automatic generation,composition algebra,effectiveness,expressiveness,graphic design,information presentation,presentation tool,user interface},
month = {apr},
number = {2},
pages = {110--141},
title = {{Automating the Design of Graphical Presentations of Relational Information}},
url = {http://dl.acm.org/doi/10.1145/22949.22950},
volume = {5},
year = {1986}
}
@article{Weissgerber2015a,
abstract = {Figures in scientific publications are critically important because they often show the data supporting key findings. Our systematic review of research articles published in top physiology journals (n = 703) suggests that, as scientists, we urgently need to change our practices for presenting continuous data in small sample size studies. Papers rarely included scatterplots, box plots, and histograms that allow readers to critically evaluate continuous data. Most papers presented continuous data in bar and line graphs. This is problematic, as many different data distributions can lead to the same bar or line graph. The full data may suggest different conclusions from the summary statistics. We recommend training investigators in data presentation, encouraging a more complete presentation of data, and changing journal editorial policies. Investigators can quickly make univariate scatterplots for small sample size studies using our Excel templates.},
author = {Weissgerber, Tracey L. and Milic, Natasa M. and Winham, Stacey J. and Garovic, Vesna D.},
doi = {10.1371/journal.pbio.1002128},
file = {::},
isbn = {0196-0644 (Print)$\backslash$n0196-0644 (Linking)},
issn = {1545-7885},
journal = {PLoS biology},
keywords = {*Data Interpretation,Publishing,Statistical},
language = {eng},
month = {apr},
number = {4},
pages = {e1002128--e1002128},
pmid = {25901488},
publisher = {Public Library of Science},
title = {{Beyond bar and line graphs: time for a new data presentation paradigm}},
url = {https://www.ncbi.nlm.nih.gov/pubmed/25901488 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4406565/ https://dx.plos.org/10.1371/journal.pbio.1002128},
volume = {13},
year = {2015}
}
@inproceedings{Kandel2012,
abstract = {Data quality issues such as missing, erroneous, extreme and duplicate values undermine analysis and are time-consuming to find and fix. Automated methods can help identify anomalies, but determining what constitutes an error is context-dependent and so requires human judgment. While visualization tools can facilitate this process, analysts must often manually construct the necessary views, requiring significant expertise. We present Profiler, a visual analysis tool for assessing quality issues in tabular data. Profiler applies data mining methods to automatically flag problematic data and suggests coordinated summary visualizations for assessing the data in context. The system contributes novel methods for integrated statistical and visual analysis, automatic view suggestion, and scalable visual summaries that support real-time interaction with millions of data points. We present Profiler's architecture - - including modular components for custom data types, anomaly detection routines and summary visualizations - - and describe its application to motion picture, natural disaster and water quality data sets. {\textcopyright} 2012 ACM.},
address = {New York, New York, USA},
author = {Kandel, Sean and Parikh, Ravi and Paepcke, Andreas and Hellerstein, Joseph M. and Heer, Jeffrey},
booktitle = {Proceedings of the Workshop on Advanced Visual Interfaces AVI},
doi = {10.1145/2254556.2254659},
file = {::},
isbn = {9781450312875},
keywords = {anomaly detection,data analysis,data quality,visualization},
pages = {547--554},
publisher = {ACM Press},
title = {{Profiler: Integrated statistical analysis and visualization for data quality assessment}},
url = {http://dl.acm.org/citation.cfm?doid=2254556.2254659},
year = {2012}
}
@article{Correll2019,
abstract = {Famous examples such as Anscombe's Quartet highlight that one of the core benefits of visualizations is allowing people to discover visual patterns that might otherwise be hidden by summary statistics. This visual inspection is particularly important in exploratory data analysis, where analysts can use visualizations such as histograms and dot plots to identify data quality issues. Yet, these visualizations are driven by parameters such as histogram bin size or mark opacity that have a great deal of impact on the final visual appearance of the chart, but are rarely optimized to make important features visible. In this paper, we show that data flaws have varying impact on the visual features of visualizations, and that the adversarial or merely uncritical setting of design parameters of visualizations can obscure the visual signatures of these flaws. Drawing on the framework of Algebraic Visualization Design, we present the results of a crowdsourced study showing that common visualization types can appear to reasonably summarize distributional data while hiding large and important flaws such as missing data and extraneous modes. We make use of these results to propose additional best practices for visualizations of distributions for data quality tasks.},
author = {Correll, Michael A and Li, Mingwei and Kindlmann, Gordon and Scheidegger, Carlos},
doi = {10.1109/TVCG.2018.2864907},
issn = {19410506},
journal = {IEEE Transactions on Visualization and Computer Graphics},
keywords = {Graphical perception,data quality,univariate visualizations},
month = {jan},
number = {1},
pages = {830--839},
publisher = {IEEE Computer Society},
title = {{Looks Good to Me: Visualizations As Sanity Checks}},
volume = {25},
year = {2019}
}
@techreport{Butler1993,
author = {Butler, Darrell L},
booktitle = {Behavior Research Methods, Instruments, {\&} Computers},
file = {::},
number = {2},
pages = {81--92},
title = {{Graphics in psychology: Pictures, data, and especially concepts}},
volume = {25},
year = {1993}
}
@article{Kyonka2019,
abstract = {Debates about the utility of p values and correct ways to analyze data have inspired new guidelines on statistical inference by the American Psychological Association (APA) and changes in the way results are reported in other scientific journals, but their impact on the Journal of the Experimental Analysis of Behavior (JEAB) has not previously been evaluated. A content analysis of empirical articles published in JEAB between 1992 and 2017 investigated whether statistical and graphing practices changed during that time period. The likelihood that a JEAB article reported a null hypothesis significance test, included a confidence interval, or depicted at least one figure with error bars has increased over time. Features of graphs in JEAB, including the proportion depicting single-subject data, have not changed systematically during the same period. Statistics and graphing trends in JEAB largely paralleled those in mainstream psychology journals, but there was no evidence that changes to APA style had any direct impact on JEAB. In the future, the onus will continue to be on authors, reviewers and editors to ensure that statistical and graphing practices in JEAB continue to evolve without interfering with characteristics that set the journal apart from other scientific journals.},
annote = {This is really focused on applied behavior analysis and this specific journal.},
author = {Kyonka, Elizabeth G.E. and Mitchell, Suzanne H. and Bizo, Lewis A.},
doi = {10.1002/jeab.509},
file = {::},
issn = {19383711},
journal = {Journal of the Experimental Analysis of Behavior},
keywords = {confidence intervals,error bars,graphs,null hypothesis significance testing,statistical reform},
month = {mar},
number = {2},
pages = {155--165},
publisher = {Wiley-Blackwell Publishing Ltd},
title = {{Beyond inference by eye: Statistical and graphing practices in JEAB, 1992-2017}},
volume = {111},
year = {2019}
}
@article{Hu2018,
abstract = {Generating knowledge from data is an increasingly important activity. This process of data exploration consists of multiple tasks: data ingestion, visualization, statistical analysis, and storytelling. Though these tasks are complementary, analysts often execute them in separate tools. Moreover, these tools have steep learning curves due to their reliance on manual query specification. Here, we describe the design and implementation of DIVE, a web-based system that integrates state-of-the-art data exploration features into a single tool. DIVE contributes a mixed-initiative interaction scheme that combines recommendation with point-and-click manual specification, and a consistent visual language that unifies different stages of the data exploration workflow. In a controlled user study with 67 professional data scientists, we find that DIVE users were significantly more successful and faster than Excel users at completing predefined data visualization and analysis tasks.},
author = {Hu, Kevin and Orghian, Diana and Hidalgo, C{\'{e}}sar},
doi = {10.1145/3209900.3209910},
file = {::},
isbn = {9781450358279},
journal = {Proceedings of the Workshop on Human-In-the-Loop Data Analytics, HILDA 2018},
keywords = {Data exploration,Data visualization,Mixed-initiative interfaces,Statistical analysis,Visualization recommendation},
title = {{DIVE: A mixed-initiative system supporting integrated data exploration workflows}},
year = {2018}
}
@misc{Pastore2017,
annote = {Take home points:
1. Bar graphs are misleading
2. Summary statistics are misleading. 

Shows graphs that reveal more information than just the summary data and/or bar charts. 

Nothing too exciting here, but includes a lot of good references},
author = {Pastore, Massimiliano and Lionetti, Francesca and Alto{\`{e}}, Gianmarco},
booktitle = {Frontiers in Psychology},
doi = {10.3389/fpsyg.2017.01666},
file = {::},
issn = {16641078},
keywords = {Bar chart and box plot,Credibility crisis,Exploratory data analysis,Graphical representation,Statistical reasoning,visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
mendeley-tags = {visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
month = {sep},
number = {SEP},
publisher = {Frontiers Media S.A.},
title = {{When one shape does not fit all: A commentary essay on the use of graphs in psychological research}},
volume = {8},
year = {2017}
}
@article{Peden2000,
abstract = {In this article, we report a content analysis of data graphs in introductory and upper level psychology textbooks. Three raters classified data graphs as either line, bar, scatter, 100{\%}, or picture graphs. The results indicated that (a) only 5 types of data graphs appear in psychology textbooks, (b) the number of data graphs per page varies considerably in both types of textbook, and (c) comparisons of observed and expected frequencies revealed that proportions of 100{\%} graphs were greater in introductory textbooks and proportions of scatter graphs were greater in upper level textbooks. We discuss implications of these findings for teachers of psychology and authors of undergraduate psychology textbooks.},
annote = {This will be a good one to reference. And then they quotesomebody who said that graphical literacy is as important as reading and writing. They also survey a bunch of textbooks and see how well they are adequately addressing the needs of students.},
author = {Peden, Blaine F and Hausmann, Sarah E},
doi = {10.1207/S15328023TOP2702_03},
file = {::},
journal = {Teaching of Psychology},
keywords = {graphics,reform,statistical reasoning,student training,superiority of,survey of use,teachers of statistics,textbooks: errors,visualization,visualizations in teaching},
mendeley-tags = {graphics,reform,statistical reasoning,student training,superiority of,survey of use,teachers of statistics,textbooks: errors,visualization,visualizations in teaching},
number = {2},
pages = {93--97},
title = {{Data Graphs in Introductory and Upper Level Psychology Textbooks: A Content Analysis}},
volume = {27},
year = {2000}
}
@article{Schild2013,
author = {Schild, A. H. E. and Voracek, M.},
journal = {Research Synthesis Methods},
number = {3},
pages = {209--219},
title = {{Less is less: A systematic review of graph use in meta‐analyses. - PsycNET}},
url = {https://psycnet.apa.org/record/2013-34734-001},
volume = {4},
year = {2013}
}
@article{Amrhein2019a,
abstract = {Statistical inference often fails to replicate. One reason is that many results may be selected for drawing inference because some threshold of a statistic like the P-value was crossed, leading to biased reported effect sizes. Nonetheless, considerable non-replication is to be expected even without selective reporting, and generalizations from single studies are rarely if ever warranted. Honestly reported results must vary from replication to replication because of varying assumption violations and random variation; excessive agreement itself would suggest deeper problems, such as failure to publish results in conflict with group expectations or desires. A general perception of a “replication crisis” may thus reflect failure to recognize that statistical tests not only test hypotheses, but countless assumptions and the entire environment in which research takes place. Because of all the uncertain and unknown assumptions that underpin statistical inferences, we should treat inferential statistics as highly unstable local descriptions of relations between assumptions and data, rather than as providing generalizable inferences about hypotheses or models. And that means we should treat statistical results as being much more incomplete and uncertain than is currently the norm. Acknowledging this uncertainty could help reduce the allure of selective reporting: Since a small P-value could be large in a replication study, and a large P-value could be small, there is simply no need to selectively report studies based on statistical results. Rather than focusing our study reports on uncertain conclusions, we should thus focus on describing accurately how the study was conducted, what problems occurred, what data were obtained, what analysis methods were used and why, and what output those methods produced.},
author = {Amrhein, Valentin and Trafimow, David and Greenland, Sander},
doi = {10.1080/00031305.2018.1543137},
issn = {15372731},
journal = {American Statistician},
number = {sup1},
title = {{Inferential Statistics as Descriptive Statistics: There Is No Replication Crisis if We Don't Expect Replication}},
volume = {73},
year = {2019}
}
@article{Greenland2019,
abstract = {The present note explores sources of misplaced criticisms of P-values, such as conflicting definitions of “significance levels” and “P-values” in authoritative sources, and the consequent misinterpretation of P-values as error probabilities. It then discusses several properties of P-values that have been presented as fatal flaws: That P-values exhibit extreme variation across samples (and thus are “unreliable”), confound effect size with sample size, are sensitive to sample size, and depend on investigator sampling intentions. These properties are often criticized from a likelihood or Bayesian framework, yet they are exactly the properties P-values should exhibit when they are constructed and interpreted correctly within their originating framework. Other common criticisms are that P-values force users to focus on irrelevant hypotheses and overstate evidence against those hypotheses. These problems are not however properties of P-values but are faults of researchers who focus on null hypotheses and overstate evidence based on misperceptions that p = 0.05 represents enough evidence to reject hypotheses. Those problems are easily seen without use of Bayesian concepts by translating the observed P-value p into the Shannon information (S-value or surprisal) –log 2 (p).},
author = {Greenland, Sander},
doi = {10.1080/00031305.2018.1529625},
issn = {15372731},
journal = {American Statistician},
number = {sup1},
title = {{Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values}},
volume = {73},
year = {2019}
}
@article{Merkle2018a,
abstract = {This article describes blavaan, an R package for estimating Bayesian structural equation models (SEMs) via JAGS and for summarizing the results. It also describes a novel parameter expansion approach for estimating specific types of models with residual covariances, which facilitates estimation of these models in JAGS. The methodology and software are intended to provide users with a general means of estimating Bayesian SEMs, both classical and novel, in a straightforward fashion. Users can estimate Bayesian versions of classical SEMs with lavaan syntax, they can obtain state-of-the-art Bayesian fit measures associated with the models, and they can export JAGS code to modify the SEMs as desired. These features and more are illustrated by example, and the parameter expansion approach is explained in detail.},
archivePrefix = {arXiv},
arxivId = {1511.05604},
author = {Merkle, Edgar C. and Rosseel, Yves},
doi = {10.18637/jss.v085.i04},
eprint = {1511.05604},
file = {::},
issn = {15487660},
journal = {Journal of Statistical Software},
keywords = {Bayesian SEM,JAGS,Lavaan,MCMC,Structural equation models},
month = {jun},
number = {1},
pages = {1--30},
publisher = {American Statistical Association},
title = {blavaan: bayesian structural equation models via parameter expansion},
volume = {85},
year = {2018}
}
@incollection{Kaplan2012,
author = {Kaplan, D. and Depaoli, S.},
booktitle = {Handbook of structural equation modeling},
editor = {Hoyle, R. H.},
pages = {650--673},
title = {{Bayesian structural equation modeling}},
url = {https://psycnet.apa.org/record/2012-16551-038},
year = {2012}
}
@article{Assaf2018,
abstract = {While the Bayesian SEM approach is now receiving a strong attention in the literature, tourism studies still heavily rely on the covariance-based approach for SEM estimation. In a recent special issue dedicated to the topic, Zyphur and Oswald (2013) used the term “Bayesian revolution” to describe the rapid growth of the Bayesian approach across multiple social science disciplines. The method introduces several advantages that make SEM estimation more flexible and powerful. We aim in this paper to introduce tourism researchers to the power of the Bayesian approach and discuss its unique advantages over the covariance-based approach. We provide first some foundations of Bayesian estimation and inference. We then present an illustration of the method using a tourism application. The paper also conducts a Monte Carlo simulation to illustrate the performance of the Bayesian approach in small samples and discuss several complicated SEM contexts where the Bayesian approach provides unique advantages.},
author = {Assaf, A. George and Tsionas, Mike and Oh, Haemoon},
doi = {10.1016/j.tourman.2017.07.018},
file = {::},
issn = {02615177},
journal = {Tourism Management},
keywords = {Bayesian approach,Monte Carlo simulation,SEM,Small samples},
month = {feb},
pages = {98--109},
publisher = {Elsevier Ltd},
title = {{The time has come: Toward Bayesian SEM estimation in tourism research}},
volume = {64},
year = {2018}
}
@article{McDonald2002,
abstract = {Principles for reporting analyses using structural equation modeling are reviewed, with the goal of supplying readers with complete and accurate information. It is recommended that every report give a detailed justification of the model used, along with plausible alternatives and an account of identifiability. Nonnormality and missing data problems should also be addressed. A complete set of parameters and their standard errors is desirable, and it will often be convenient to supply the correlation matrix and discrepancies, as well as goodness-of-fit indices, so that readers can exercise independent critical judgment. A survey of fairly representative studies compares recent practice with the principles of reporting recommended here.},
author = {McDonald, Roderick P. and Ho, Moon Ho Ringo},
doi = {10.1037/1082-989X.7.1.64},
issn = {1082989X},
journal = {Psychological Methods},
number = {1},
pages = {64--82},
pmid = {11928891},
publisher = {American Psychological Association Inc.},
title = {{Principles and practice in reporting structural equation analyses}},
volume = {7},
year = {2002}
}
@article{Jackson2009,
abstract = {Reporting practices in 194 confirmatory factor analysis studies (1,409 factor models) published in American Psychological Association journals from 1998 to 2006 were reviewed and compared with established reporting guidelines. Three research questions were addressed: (a) how do actual reporting practices compare with published guidelines? (b) how do researchers report model fit in light of divergent perspectives on the use of ancillary fit indices (e.g., L.-T. Hu {\&} P. M. Bentler, 1999; H. W. Marsh, K.-T., Hau, {\&} Z. Wen, 2004)? and (c) are fit measures that support hypothesized models reported more often than fit measures that are less favorable? Results indicate some positive findings with respect to reporting practices including proposing multiple models a priori and near universal reporting of the chi-square significance test. However, many deficiencies were found such as lack of information regarding missing data and assessment of normality. Additionally, the authors found increases in reported values of some incremental fit statistics and no statistically significant evidence that researchers selectively report measures of fit that support their preferred model. Recommendations for reporting are summarized and a checklist is provided to help editors, reviewers, and authors improve reporting practices. {\textcopyright} 2009 American Psychological Association.},
author = {Jackson, Dennis L. and Gillaspy, J. Arthur and Purc-Stephenson, Rebecca},
doi = {10.1037/a0014694},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {confirmatory factor analysis,construct validation,research methods,statistical reporting,structural equation models},
month = {mar},
number = {1},
pages = {6--23},
pmid = {19271845},
title = {{Reporting Practices in Confirmatory Factor Analysis: An Overview and Some Recommendations}},
volume = {14},
year = {2009}
}
@article{MacCallum1993,
abstract = {For any given covariance structure model, there will often be alternative models that are indistinguishable from the original model in terms of goodness of fit to data. The existence of such equivalent models in almost universally ignored in empirical studies. A study of 53 published applications showed that equivalent models exist routinely, often in large numbers. Detailed study of three applications showed that equivalent models may often offer substantively meaningful alternative explanations of data. The importance of the equivalent model phenomenon and recommendations for managing and confronting the problem in practice are discussed.},
author = {MacCallum, Robert C. and Wegener, Duane T. and Uchino, Bert N. and Fabrigar, Leandre R.},
doi = {10.1037/0033-2909.114.1.185},
issn = {00332909},
journal = {Psychological Bulletin},
month = {jul},
number = {1},
pages = {185--199},
title = {{The Problem of Equivalent Models in Applications of Covariance Structure Analysis}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/8346326},
volume = {114},
year = {1993}
}
@article{Lee1990,
abstract = {This study introduces the replacing rule as a simplification of Stelzl's (1986) four rules for the generation of recursive equivalent models. The replacing rule is applicable to nonrecursive as well as recursive models, and generates equivalent models through the replacement of direct paths with residual correlations, through the replacement of residual correlations with direct paths, or through the inversion of path directions. Examples of the use of the replacing rule are provided, and its advantages over Stelzl's four rules are discussed. {\textcopyright} 1990, Taylor {\&} Francis Group, LLC. All rights reserved.},
author = {Lee, Soonmook and Hershberger, Scott},
doi = {10.1207/s15327906mbr2503_4},
issn = {15327906},
journal = {Multivariate Behavioral Research},
month = {jul},
number = {3},
pages = {313--334},
title = {{A Simple Rule for Generating Equivalent Models in Covariance Structure Modeling}},
volume = {25},
year = {1990}
}
@article{Jiang2017,
abstract = {When the assumption of multivariate normality is violated and the sample sizes are relatively small, existing test statistics such as the likelihood ratio statistic and Satorra–Bentler's rescaled and adjusted statistics often fail to provide reliable assessment of overall model fit. This article proposes four new corrected statistics, aiming for better model evaluation with nonnormally distributed data at small sample sizes. A Monte Carlo study is conducted to compare the performances of the four corrected statistics against those of existing statistics regarding Type I error rate. Results show that the performances of the four new statistics are relatively stable compared with those of existing statistics. In particular, Type I error rates of a new statistic are close to the nominal level across all sample sizes under a condition of asymptotic robustness. Other new statistics also exhibit improved Type I error control, especially with nonnormally distributed data at small sample sizes.},
author = {Jiang, Ge and Yuan, Ke Hai},
doi = {10.1080/10705511.2016.1277726},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Satorra–Bentler's corrected statistics,nonnormality,small sample size,test statistic},
number = {4},
pages = {479--494},
publisher = {Routledge},
title = {{Four New Corrected Statistics for SEM With Small Samples and Nonnormally Distributed Data}},
url = {http://dx.doi.org/10.1080/10705511.2016.1277726 https://doi.org/10.1080/10705511.2016.1277726},
volume = {24},
year = {2017}
}
@article{Hancock2011,
abstract = {A two-step process is commonly used to evaluate data-model fit of latent variable path models, the first step addressing the measurement portion of the model and the second addressing the structural portion of the model. Unfortunately, even if the fit of the measurement portion of the model is perfect, the ability to assess the fit within the structural portion is affected by the quality of the factor-variable relations within the measurement model. The result is that models with poorer quality measurement appear to have better data-model fit, whereas models with better quality measurement appear to have worse data-model fit. The current article illustrates this phenomenon across different classes of fit indices, discusses related structural assessment problems resulting from issues of measurement quality, and endorses a supplemental modeling step evaluating the structural portion of the model in isolation from the measurement model. {\textcopyright} 2011 The Author(s).},
author = {Hancock, Gregory R. and Mueller, Ralph O.},
doi = {10.1177/0013164410384856},
file = {::},
issn = {15523888},
journal = {Educational and Psychological Measurement},
keywords = {covariance structure modeling,latent variable models,structural equation modeling},
number = {2},
pages = {306--324},
title = {{The reliability paradox in assessing structural relations within covariance structure models}},
volume = {71},
year = {2011}
}
@article{Asparouhov2014,
author = {Asparouhov, Tihomir and Muth, Bengt},
file = {::},
number = {20},
pages = {1--14},
title = {{Using Mplus individual residual plots for diagnostics and model evaluation in SEM}},
year = {2014}
}
@article{Huang2015,
abstract = {The asymptotically distribution free (ADF) method is often used to estimate parameters or test models without a normal distribution assumption on variables, both in covariance structure analysis and in correlation structure analysis. However, little has been done to study the differences in behaviors of the ADF method in covariance versus correlation structure analysis. The behaviors of 3 test statistics frequently used to evaluate structural equation models with nonnormally distributed variables, $\chi${\textless}sup{\textgreater}2{\textless}/sup{\textgreater} test T{\textless}inf{\textgreater}AGLS{\textless}/inf{\textgreater} and its small-sample variants T{\textless}inf{\textgreater}YB{\textless}/inf{\textgreater} and T{\textless}inf{\textgreater}F{\textless}/inf{\textgreater} {\textless}inf{\textgreater}(AGLS){\textless}/inf{\textgreater} were compared. Results showed that the ADF method in correlation structure analysis with test statistic T{\textless}inf{\textgreater}AGLS{\textless}/inf{\textgreater} performs much better at small sample sizes than the corresponding test for covariance structures. In contrast, test statistics T{\textless}inf{\textgreater}YB{\textless}/inf{\textgreater} and T{\textless}inf{\textgreater}F{\textless}/inf{\textgreater} {\textless}inf{\textgreater}(AGLS){\textless}/inf{\textgreater} under the same conditions generally perform better with covariance structures than with correlation structures. It is proposed that excessively large and variable condition numbers of weight matrices are a cause of poor behavior of ADF test statistics in small samples, and results showed that these condition numbers are systematically increased with substantial increase in variance as sample size decreases. Implications for research and practice are discussed.},
author = {Huang, Yafei and Bentler, Peter M.},
doi = {10.1080/10705511.2014.954078},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {asymptotically distribution free,correlation structure analysis,covariance structure analysis},
number = {4},
pages = {489--503},
publisher = {Routledge},
title = {{Behavior of Asymptotically Distribution Free Test Statistics in Covariance Versus Correlation Structure Analysis}},
url = {http://dx.doi.org/10.1080/10705511.2014.954078 https://doi.org/10.1080/10705511.2014.954078},
volume = {22},
year = {2015}
}
@article{Heene2011,
abstract = {Fit indices are widely used in order to test the model fit for structural equation models. In a highly influential study, Hu and Bentler (1999) showed that certain cutoff values for these indices could be derived, which, over time, has led to the reification of these suggested thresholds as " golden rules" for establishing the fit or other aspects of structural equation models. The current study shows how differences in unique variances influence the value of the global chi-square model test and the most commonly used fit indices: Root-mean-square error of approximation, standardized root-mean-square residual, and the comparative fit index. Using data simulation, the authors illustrate how the value of the chi-square test, the root-mean-square error of approximation, and the standardized root-mean-square residual are decreased when unique variances are increased although model misspecification is present. For a broader understanding of the phenomenon, the authors used different sample sizes, number of observed variables per factor, and types of misspecification. A theoretical explanation is provided, and implications for the application of structural equation modeling are discussed. {\textcopyright} 2011 American Psychological Association.},
author = {Heene, Moritz and Hilbert, Sven and Draxler, Clemens and Ziegler, Matthias and B{\"{u}}hner, Markus},
doi = {10.1037/a0024917},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Fit indices,Model fit,Model misfit,Model test,Structural equation modeling},
number = {3},
pages = {319--336},
title = {{Masking Misfit in Confirmatory Factor Analysis by Increasing Unique Variances: A Cautionary Note on the Usefulness of Cutoff Values of Fit Indices}},
volume = {16},
year = {2011}
}
@article{Hayduk2014,
abstract = {Researchers using factor analysis tend to dismiss the significant ill fit of factor models by presuming that if their factor model is close-to-fitting, it is probably close to being properly causally specified. Close fit may indeed result from a model being close to properly causally specified, but close-fitting factor models can also be seriously causally misspecified. This article illustrates a variety of nonfactor causal worlds that are perfectly, but inappropriately, fit by factor models. Seeing nonfactor worlds that are perfectly yet erroneously fit via factor models should help researchers understand that close-to-fitting factor models may seriously misrepresent the world's causal structure. Statistical cautions regarding the factor model's proclivity to fit when it ought not to fit have been insufficiently publicized and are rarely heeded. A research commitment to understanding the world's causal structure, combined with clear examples of factor mismodeling should spur diagnostic assessment of significant factor model failures—including reassessment of published failing factor models.},
author = {Hayduk, Leslie},
doi = {10.1177/0013164414527449},
file = {::},
isbn = {0013164414},
issn = {15523888},
journal = {Educational and Psychological Measurement},
keywords = {close fit,factor analysis,factor model,structural equation modeling,testing},
number = {6},
pages = {905--926},
title = {{Seeing Perfectly Fitting Factor Models That Are Causally Misspecified: Understanding That Close-Fitting Models Can Be Worse}},
volume = {74},
year = {2014}
}
@article{McNeish2018,
abstract = {Latent variable modeling is a popular and flexible statistical framework. Concomitant with fitting latent variable models is assessment of how well the theoretical model fits the observed data. Although firm cutoffs for these fit indexes are often cited, recent statistical proofs and simulations have shown that these fit indexes are highly susceptible to measurement quality. For instance, a root mean square error of approximation (RMSEA) value of 0.06 (conventionally thought to indicate good fit) can actually indicate poor fit with poor measurement quality (e.g., standardized factors loadings of around 0.40). Conversely, an RMSEA value of 0.20 (conventionally thought to indicate very poor fit) can indicate acceptable fit with very high measurement quality (standardized factor loadings around 0.90). Despite the wide-ranging effect on applications of latent variable models, the high level of technical detail involved with this phenomenon has curtailed the exposure of these important findings to empirical researchers who are employing these methods. This article briefly reviews these methodological studies in minimal technical detail and provides a demonstration to easily quantify the large influence measurement quality has on fit index values and how greatly the cutoffs would change if they were derived under an alternative level of measurement quality. Recommendations for best practice are also discussed.},
author = {McNeish, Daniel and An, Ji and Hancock, Gregory R.},
doi = {10.1080/00223891.2017.1281286},
file = {::},
issn = {00223891},
journal = {Journal of Personality Assessment},
number = {1},
pages = {43--52},
title = {{The Thorny Relation Between Measurement Quality and Fit Index Cutoffs in Latent Variable Models}},
volume = {100},
year = {2018}
}
@article{Tomarken2005,
abstract = {Because structural equation modeling (SEM) has become a very popular data-analytic technique, it is important for clinical scientists to have a balanced perception of its strengths and limitations. We review several strengths of SEM, with a particular focus on recent innovations (e.g., latent growth modeling, multilevel SEM models, and approaches for dealing with missing data and with violations of normality assumptions) that underscore how SEM has become a broad data-analytic framework with flexible and unique capabilities. We also consider several limitations of SEM and some misconceptions that it tends to elicit. Major themes emphasized are the problem of omitted variables, the importance of lower-order model components, potential limitations of models judged to be well fitting, the inaccuracy of some commonly used rules of thumb, and the importance of study design. Throughout, we offer recommendations for the conduct of SEM analyses and the reporting of results.},
author = {Tomarken, Andrew J. and Waller, Niels G.},
doi = {10.1146/annurev.clinpsy.1.102803.144239},
file = {::},
issn = {1548-5943},
journal = {Annual Review of Clinical Psychology},
keywords = {a balanced,abstract because structural equation,causal models,clinical scientists to have,confirmatory factor analysis,covariance structure analysis,has become a very,it is important for,latent variables,modeling,path analysis,pop-,sem,statistical modeling,ular data-analytic technique},
number = {1},
pages = {31--65},
pmid = {17716081},
title = {{Structural Equation Modeling: Strengths, Limitations, and Misconceptions}},
volume = {1},
year = {2005}
}
@article{Hallgren2019a,
abstract = {Introduction: Structural equation modeling (SEM) is a multivariate data analytic technique used in many domains of addictive behaviors research. SEM results are usually summarized and communicated through statistical tables and path diagrams, which emphasize path coefficients and global fit without showing specific quantitative values of data points that underlie the model results. Data visualization methods are often absent in SEM research, which may limit the quality and impact of SEM research by reducing data transparency, obscuring unexpected data anomalies and unmodeled heterogeneity, and inhibiting the communication of SEM research findings to research stakeholders who do not have advanced statistical training in SEM. Methods and results: In this report, we show how data visualization methods can address these limitations and improve the quality of SEM-based addictive behaviors research. We first introduce SEM and data visualization methodologies and differentiate data visualizations from model visualizations that are commonly used in SEM, such as path diagrams. We then discuss ways researchers may utilize data visualization in SEM research, including by obtaining estimates of latent variables and by visualizing multivariate relations in two-dimensional figures. R syntax is provided to help others generate data visualizations for several types of effects commonly modeled in SEM, including correlation, regression, moderation, and simple mediation. Discussion: The techniques outlined here may help spur the use of data visualization in SEM-based addictive behaviors research. Using data visualization in SEM may enhance methodological transparency and improve communication of research findings.},
author = {Hallgren, Kevin A. and McCabe, Connor J. and King, Kevin M. and Atkins, David C.},
doi = {10.1016/j.addbeh.2018.08.030},
file = {::},
issn = {18736327},
journal = {Addictive Behaviors},
keywords = {Applied data analysis,Data visualization,Latent variable modeling,Mediation,Moderation,Structural equation model},
number = {March 2018},
pages = {74--82},
publisher = {Elsevier},
title = {{Beyond path diagrams: Enhancing applied structural equation modeling research through data visualization}},
url = {https://doi.org/10.1016/j.addbeh.2018.08.030},
volume = {94},
year = {2019}
}
@article{McIntosh2007,
abstract = {With seemingly few exceptions, current practice in structural equation modelling (SEM) aims at establishing close rather than exact fit between hypothetical models and observed data. This orientation has gone without serious challenge until the appearance of a sharp critique by Barrett (2007), who suggests discontinuing the use of approximate fit indices (AFIs) in SEM. The present article provides a commentary and elaboration on the key aspects of Barrett's position, and also supplies further practical guidance and methodological references to applied researchers, who may be motivated to significantly alter their modelling practices in order to address the issues he raises. I strongly support his calls for performing more detailed diagnostic examinations of model misfit when confronted with a significant chi-square ($\chi$2) test statistic, rather than simply deferring to AFIs. However, I do not second the recommendation that assessments of a model's predictive accuracy (e.g., R2 values) can supplant a focused search for the reasons underlying significant global misfit. Accordingly, some misconceptions about the relationship between global model fit and predictive accuracy are pointed out, and modified advice is given to practitioners. Issues surrounding how to properly appraise a model yielding a non-significant $\chi$2 are also discussed, as are concerns raised by Barrett about small sample size and power in SEM. It is concluded that AFIs offer little value-added in SEM practice, given the wide variety of available methods for performing detailed model assessments. However, I leave the issue of whether AFIs should be completely abandoned to future research. {\textcopyright} 2006 Elsevier Ltd. All rights reserved.},
author = {McIntosh, Cameron N.},
doi = {10.1016/j.paid.2006.09.020},
file = {::},
issn = {01918869},
journal = {Personality and Individual Differences},
keywords = {Approximate fit indices,Exact fit,Structural equation modelling},
number = {5},
pages = {859--867},
title = {{Rethinking fit assessment in structural equation modelling: A commentary and elaboration on Barrett (2007)}},
volume = {42},
year = {2007}
}
@article{Steiger2007,
abstract = {Barrett's (2007) article on "adjudging model fit" raises some important issues concerning the use of global fit indices to justify weak structural equation models, and recommends prohibition of future use of such indices. In this commentary, I critique Barrett's presentation, and show that his recommendations are (a) unnecessarily regressive, and (b) likely to be ignored. Then I suggest a constructive alternative in line with the spirit of his concerns. {\textcopyright} 2006 Elsevier Ltd. All rights reserved.},
author = {Steiger, James H.},
doi = {10.1016/j.paid.2006.09.017},
file = {::},
issn = {01918869},
journal = {Personality and Individual Differences},
keywords = {Confidence interval estimation,Fit indices,Hypothesis testing,Structural equation modeling},
number = {5},
pages = {893--898},
title = {{Understanding the limitations of global fit assessment in structural equation modeling}},
volume = {42},
year = {2007}
}
@article{Rose2017,
abstract = {The purpose of this study was to evaluate the sensitivity of selected fit index statistics in determining model fit in structural equation modeling (SEM). The results indicated a large dependency on correlation magnitude of the input correlation matrix, with mixed results when the correlation magnitudes were low and a primary indication of good model fit. This was due to the default SEM method of Maximum Likelihood that assumes unstandardized correlation values. However, this warning is not well-known, and is only obscurely mentioned in some textbooks. Many SEM computer software programs do not give appropriate error indications that the results are unsubstantiated when standardized correlation values are provided.},
author = {Rose, Sarah A. and Markman, Barry and Sawilowsky, Shlomo},
doi = {10.22237/jmasm/1493597040},
file = {::},
issn = {15389472},
journal = {Journal of Modern Applied Statistical Methods},
keywords = {CFI,Covariance matrices,Fit indices,RMSEA,SEM,SRMR,Structural equation model},
number = {1},
pages = {69--85},
title = {{Limitations in the systematic analysis of structural equation model fit indices}},
volume = {16},
year = {2017}
}
@article{Kaplan2010,
author = {Kaplan, David},
doi = {10.1207/s15327906mbr2502},
file = {::},
number = {May 2014},
pages = {37--41},
title = {{Multivariate Behavioral Evaluating and Modifying Covariance Structure Models : A Review and Recommendation}},
volume = {3171},
year = {2010}
}
@article{West2012,
abstract = {This chapter addresses two related but different questions. First, the authors may wish to answer the question of model fit: Does the hypothesized model provide an adequate fit to the data? Second, they may wish to answer the question of model selection: If multiple competing models have been proposed, which of these models provides the best account of the data? Or, alternatively, which competing model is most likely to replicate in another sample drawn from the same population? They focus on the model fit question in the initial part of the chapter, returning to brief consideration of the model selection question at the end of the chapter. They also briefly consider other key aspects of model evaluation beyond those of overall model fit. (PsycINFO Database Record (c) 2013 APA, all rights reserved). (chapter)},
author = {West, Stephen G and Taylor, Aaron B and Wu, Wei},
file = {::},
isbn = {978-1-60623-077-0},
journal = {Handbook of structural equation modeling.},
keywords = {Evaluation,Models,Statistical Analysis,Structural Equation Modeling,evaluation rate,model fit,model selection,statistical analysis,structural equation modeling},
number = {January 2012},
pages = {209--231},
title = {{Model fit and model selection in structural equation modeling.}},
year = {2012}
}
@article{Tomarken2003,
abstract = {The assessment of model fit is a more complex and indeterminate process than is commonly acknowledged by researchers who use structural equation modeling (SEM) techniques. Even models that are well fitting according to commonly used statistical tests and descriptive fit indices can have significant problems and ambiguities. The authors discuss 7 potential difficulties that can arise and that should temper researchers' conclusions: equivalent models, nonequivalent but well-fitting alternative models, omitted variables, problematic lower-order model components, the failure to parse composite models into meaningful partitions (e.g., measurement vs. structural), inattention to the multiple factors that affect the sensitivity of measures of fit to model misspecifications, and reliance on specification searches. In addition to providing examples of each of these problems, the authors offer recommendations for psychopathologists who conduct SEM analyses.},
author = {Tomarken, Andrew J. and Waller, Niels G.},
doi = {10.1037/0021-843X.112.4.578},
file = {::},
issn = {0021843X},
journal = {Journal of Abnormal Psychology},
number = {4},
pages = {578--598},
title = {{Potential Problems with "Well Fitting" Models}},
volume = {112},
year = {2003}
}
@article{Bollen2019b,
abstract = {Few dispute that our models are approximations to reality. Yet when it comes to structural equation models (SEMs), we use estimators that assume true models (e.g. maximum likelihood) and that can create biased estimates when the model is inexact. This article presents an overview of the Model Implied Instrumental Variable (MIIV) approach to SEMs from Bollen (1996). The MIIV estimator using Two Stage Least Squares (2SLS), MIIV-2SLS, has greater robustness to structural misspecifications than system wide estimators. In addition, the MIIV-2SLS estimator is asymptotically distribution free. Furthermore, MIIV-2SLS has equation-based overidentification tests that can help pinpoint misspecifications. Beyond these features, the MIIV approach has other desirable qualities. MIIV methods apply to higher order factor analyses, categorical measures, growth curve models, dynamic factor analysis, and nonlinear latent variables. Finally, MIIV-2SLS permits researchers to estimate and test only the latent variable model or any other subset of equations. In addition, other MIIV estimators beyond 2SLS are available. Despite these promising features, research is needed to better understand its performance under a variety of conditions that represent empirical applications. Empirical and simulation examples in the article illustrate the MIIV orientation to SEMs and highlight an R package MIIVsem that implements MIIV-2SLS.},
author = {Bollen, Kenneth A.},
doi = {10.1080/00273171.2018.1483224},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Local tests,model implied instrumental variables,robust estimator,structural equation modeling,structural misspecifications},
number = {1},
pages = {31--46},
publisher = {Routledge},
title = {{Model Implied Instrumental Variables (MIIVs): An Alternative Orientation to Structural Equation Modeling}},
url = {https://doi.org/10.1080/00273171.2018.1483224},
volume = {54},
year = {2019}
}
@article{Schonemann2010,
author = {Schonemann, Peter H},
doi = {10.1207/s15327906mbr3104},
file = {::},
number = {May 2014},
pages = {37--41},
title = {{Multivariate Behavioral The Psychopathology of Factor Indeterminancy}},
volume = {3171},
year = {2010}
}
@article{Rigdon2019a,
abstract = {Researchers have long been aware of the mathematics of factor indeterminacy. Yet, while occasionally discussed, the phenomenon is mostly ignored. In metrology, the measurement discipline of the physical sciences, uncertainty–distinct from both random error (but encompassing it) and systematic error–is a crucial characteristic of any measurement. This research argues that factor indeterminacy is uncertainty. Factor indeterminacy fundamentally threatens the validity of psychometric measurement, because it blurs the linkage between a common factor and the conceptual variable that the factor represents. Acknowledging and quantifying factor indeterminacy is important for progress in reducing this component of uncertainty in measurement, and thus improving psychological measurement over time. Based on our elaborations, we offer a range of recommendations toward achieving this goal.},
author = {Rigdon, Edward E. and Becker, Jan Michael and Sarstedt, Marko},
doi = {10.1080/00273171.2018.1535420},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Structural equation modeling,factor indeterminacy,measurement models,validity},
number = {3},
pages = {429--443},
publisher = {Routledge},
title = {{Factor Indeterminacy as Metrological Uncertainty: Implications for Advancing Psychological Measurement}},
url = {https://doi.org/10.1080/00273171.2018.1535420},
volume = {54},
year = {2019}
}
@article{Chen2008,
abstract = {This article is an empirical evaluation of the choice of fixed cutoff points in assessing the root mean square error of approximation (RMSEA) test statistic as a measure of goodness-of-fit in Structural Equation Models. Using simulation data, the authors first examine whether there is any empirical evidence for the use of a universal cutoff, and then compare the practice of using the point estimate of the RMSEA alone versus that of using it jointly with its related confidence interval. The results of the study demonstrate that there is little empirical support for the use of.05 or any other value as universal cutoff values to determine adequate model fit, regardless of whether the point estimate is used alone or jointly with the confidence interval. The authors' analyses suggest that to achieve a certain level of power or Type I error rate, the choice of cutoff values depends on model specifications, degrees of freedom, and sample size. {\textcopyright} 2008 Sage Publications.},
author = {Chen, Feinian and Curran, Patrick J. and Bollen, Kenneth A. and Kirby, James and Paxton, Pamela},
doi = {10.1177/0049124108314720},
file = {::},
issn = {00491241},
journal = {Sociological Methods and Research},
keywords = {Computer simulations,Goodness-of-fit,RMSEA,SEM},
number = {4},
pages = {462--494},
title = {{An empirical evaluation of the use of fixed cutoff points in RMSEA test statistic in structural equation models}},
volume = {36},
year = {2008}
}
@article{Thoemmes2018,
abstract = {Evaluation of model fit is critically important for every structural equation model (SEM), and sophisticated methods have been developed for this task. Among them are the -2 goodness-of-fit test, decomposition of the -2, derived measures like the popular root mean square error of approximation (RMSEA) or comparative fit index (CFI), or inspection of residuals or modification indices. Many of these methods provide a global approach to model fit evaluation: A single index is computed that quantifies the fit of the entire SEM to the data. In contrast, graphical criteria like d-separation or trek-separation allow derivation of implications that can be used for local fit evaluation, an approach that is hardly ever applied. We provide an overview of local fit evaluation from the viewpoint of SEM practitioners. In the presence of model misfit, local fit evaluation can potentially help in pinpointing where the problem with the model lies. For models that do fit the data, local tests can identify the parts of the model that are corroborated by the data. Local tests can also be conducted before a model is fitted at all, and they can be used even for models that are globally underidentified. We discuss appropriate statistical local tests, and provide applied examples. We also present novel software in R that automates this type of local fit evaluation.},
author = {Thoemmes, Felix and Rosseel, Yves and Textor, Johannes},
doi = {10.1037/met0000147},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Conditional Independence,Fit Evaluation,Structural Equation Modeling,Vanishing Tetrad Constraints},
number = {1},
pages = {27--41},
title = {{Local fit evaluation of structural equation models using graphical criteria}},
volume = {23},
year = {2018}
}
@misc{MacCallum1992,
abstract = {In applications of covariance structure modeling in which an initial model does not fit sample data well, it has become common practice to modify that model to improve its fit. Because this process is data driven, it is inherently susceptible to capitalization on chance characteristics of the data, thus raising the question of whether model modifications generalize to other samples or to the population. This issue is discussed in detail and is explored empirically through sampling studies using 2 large sets of data. Results demonstrate that over repeated samples, model modifications may be very inconsistent and cross-validation results may behave erratically. These findings lead to skepticism about generalizability of models resulting from data-driven modifications of an initial model. The use of alternative a priori models is recommended as a preferred strategy.},
author = {MacCallum, Robert C. and Roznowski, Mary and Necowitz, Lawrence B.},
booktitle = {Psychological Bulletin},
doi = {10.1037/0033-2909.111.3.490},
file = {::},
issn = {00332909},
number = {3},
pages = {490--504},
pmid = {16250105},
title = {{Model modifications in covariance structure analysis: The problem of capitalization on chance}},
volume = {111},
year = {1992}
}
@article{Goodboy2017,
abstract = {Structural equation modeling (SEM) is becoming an increasingly popular data analytic technique in communication studies. Reports of SEM analyses are published in communication journals (including Communication Research Reports) allowing for hypothesis testing with latent variables, estimation of direct and indirect causal effects, and validity testing for measurement instruments. Too often, though, serious mistakes are made by authors of SEM studies that cancel out the potential benefits of SEM. Highlighted in this work are five of the most common mistakes made by communication researchers in analyzing and reporting about structural equation models. These problems concern descriptions of model specification, model identification, and methods to evaluate the degree of model-data correspondence, or fit, and lack of replication that continue to plague the empirical SEM literature. This current work is intended as a primer, one that outlines best practices in contrast to widespread but poor practices in each of the areas just mentioned. The hope is that communication researchers yield benefits from the correct application of SEM while avoiding common pitfalls.},
author = {Goodboy, Alan K. and Kline, Rex B.},
doi = {10.1080/08824096.2016.1214121},
file = {::},
issn = {17464099},
journal = {Communication Research Reports},
keywords = {Confirmatory Factor Analysis,Path Analysis,Structural Equation Modeling,Structural Regression Model},
number = {1},
pages = {68--77},
publisher = {Routledge},
title = {{Statistical and Practical Concerns With Published Communication Research Featuring Structural Equation Modeling}},
url = {http://dx.doi.org/10.1080/08824096.2016.1214121 https://doi.org/10.1080/08824096.2016.1214121},
volume = {34},
year = {2017}
}
@article{Gonzalez2001,
abstract = {A problem with standard errors estimated by many structural equation modeling programs is described. In such programs, a parameter's standard error is sensitive to how the model is identified (i.e., how scale is set). Alternative but equivalent ways to identify a model may yield different standard errors, and hence different Z tests for a parameter, even though the identifications produce the same overall model fit. This lack of invariance due to model identification creates the possibility that different analysts may reach different conclusions about a parameter's significance level even though they test equivalent models on the same data. The authors suggest that parameters be tested for statistical significance through the likelihood ratio test, which is invariant to the identification choice.},
author = {Gonzalez, Richard and Griffin, Dale},
doi = {10.1037/1082-989x.6.3.258},
file = {::},
issn = {1082-989X},
journal = {Psychological Methods},
number = {3},
pages = {258--269},
title = {{Testing parameters in structural equation modeling: Every "one" matters.}},
volume = {6},
year = {2001}
}
@article{Chen2005,
abstract = {The relation among fit indexes, power, and sample size in structural equation model- ing is examined. The noncentrality parameter is required to compute power. The 2 existing methods of computing power have estimated the noncentrality parameter by specifying an alternative hypothesis or alternative fit. These methods cannot be im- plemented easily and reliably. In this study, 4 fit indexes (RMSEA, CFI, McDonald's Fit Index, and Steiger's gamma) were used to compute the noncentrality parameter and sample size to achieve certain level of power. The resulting power and sample size varied as a function of (a) choice of fit index, (b) number of variables/degrees of freedom, (c) relation among the variables, and (d) value of the fit index. However, if the level of misspecification were held constant, then the resulting power and sample size would be identical.},
author = {Chen, Fang Fang and Sousa, Karen H. and West, Stephen G.},
doi = {10.1207/s15328007sem1203},
file = {::},
journal = {Structural Equation Modeling},
number = {3},
pages = {471492},
title = {{Teacher's Corner: Testing Measurement Invariance of Second-Order}},
volume = {12},
year = {2005}
}
@article{Shmueli2010,
abstract = {Statistical modeling is a powerful tool for developing and testing theories by way of causal explanation, prediction, and description. In many disciplines there is near-exclusive use of statistical modeling for causal explanation and the assumption that models with high explanatory power are inherently of high predictive power. Conflation between explanation and prediction is common, yet the distinction must be understood for progressing scientific knowledge. While this distinction has been recognized in the philosophy of science, the statistical literature lacks a thorough discussion of the many differences that arise in the process of modeling for an explanatory versus a predictive goal. The purpose of this article is to clarify the distinction between explanatory and predictive modeling, to discuss its sources, and to reveal the practical implications of the distinction to each step in the modeling process. {\textcopyright} Institute of Mathematical Statistics, 2010.},
archivePrefix = {arXiv},
arxivId = {1101.0891},
author = {Shmueli, Galit},
doi = {10.1214/10-STS330},
eprint = {1101.0891},
file = {::},
issn = {08834237},
journal = {Statistical Science},
keywords = {Causality,Data mining,Explanatory modeling,Predictive modeling,Predictive power,Scientific research,Statistical strategy},
number = {3},
pages = {289--310},
title = {{To explain or to predict?}},
volume = {25},
year = {2010}
}
@article{Reiss2007,
abstract = {A recent movement in the social sciences and philosophy of the social sciences focuses on mechanisms as a central analytical unit. Starting from a pluralist perspective on the aims of the social sciences, I argue that there are a number of important aims to which knowledge about mechanismswhatever their virtues relative to other aimscontributes very little at best and that investigating mechanisms is therefore a methodological strategy with fairly limited applicability. {\textcopyright} 2007 Sage Publication.},
author = {Reiss, Julian},
doi = {10.1177/0048393107299686},
file = {::},
issn = {00483931},
journal = {Philosophy of the Social Sciences},
keywords = {Critical realism,Explanation,Mechanisms,Methodology,Social science},
number = {2},
pages = {163--184},
title = {{Do we need mechanisms in the social sciences?}},
volume = {37},
year = {2007}
}
@article{Curran2002,
abstract = {The noncentral chi-square distribution plays a key role in structural equation modeling (SEM). The likelihood ratio test statistic that accompanies virtually all SEMs asymptotically follows a noncentral chi-square under certain assumptions relating to misspecification and multivariate distribution. Many scholars use the noncentral chi-square distribution in the construction of fit indices, such as Steiger and Lind's (1980) Root Mean Square Error of Approximation (RMSEA) or the family of baseline fit indices (e.g., RNI, CFI), and for the computation of statistical power for model hypothesis testing. Despite this wide use, surprisingly little is known about the extent to which the test statistic follows a noncentral chi-square in applied research. Our study examines several hypotheses about the suitability of the noncentral chi-square distribution for the usual SEM test statistic under conditions commonly encountered in practice. We designed Monte Carlo computer simulation experiments to empirically test these research hypotheses. Our experimental conditions included seven sample sizes ranging from 50 to 1000, and three distinct model types, each with five specifications ranging from a correct model to the severely misspecified uncorrelated baseline model. In general, we found that for models with small to moderate misspecification, the noncentral chi-square distribution is well approximated when the sample size is large (e.g., greater than 200), but there was evidence of bias in both mean and variance in smaller samples. A key finding was that the test statistics for the uncorrelated variable baseline model did not follow the noncentral chi-square distribution for any model type across any sample size. We discuss the implications of our findings for the SEM fit indices and power estimation procedures that are based on the noncentral chi-square distribution as well as potential directions for future research.},
author = {Curran, Patrick J. and Bollen, Kenneth A. and Paxton, Pamela and Kirby, James and Chen, Feinian},
doi = {10.1207/S15327906MBR3701_01},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
number = {1},
pages = {1--36},
title = {{The noncentral chi-square distribution in misspecified structural equation models: Finite sample results from a Monte Carlo simulation}},
volume = {37},
year = {2002}
}
@article{Hayduk2007,
abstract = {Barrett (2007) presents minor revisions to statements previously posted on Barrett's website, and discussed on SEMNET (a web discussion group about structural equation modeling). Unfortunately, Barrett's "recommendations" remain seriously statistically and methodologially flawed. Unlike Barrett, we see scientific value in reporting models that challenge or discredit theories. We critique Barrett's way of proceeding in the context of both especially small and large samples, and we urge greater attention to the $\chi$2 significance test. {\textcopyright} 2006 Elsevier Ltd. All rights reserved.},
author = {Hayduk, Leslie and Cummings, Greta and Boadu, Kwame and Pazderka-Robinson, Hannah and Boulianne, Shelley},
doi = {10.1016/j.paid.2006.10.001},
file = {::},
issn = {01918869},
journal = {Personality and Individual Differences},
keywords = {Chi-square (or $\chi$2),Fit,SEM,Sample size,Structural equation model,Testing},
number = {5},
pages = {841--850},
title = {{Testing! testing! one, two, three - Testing the theory in structural equation models!}},
volume = {42},
year = {2007}
}
@article{Shi2019a,
abstract = {This study investigated the effect the number of observed variables (p) has on three structural equation modeling indices: the comparative fit index (CFI), the Tucker–Lewis index (TLI), and the root mean square error of approximation (RMSEA). The behaviors of the population fit indices and their sample estimates were compared under various conditions created by manipulating the number of observed variables, the types of model misspecification, the sample size, and the magnitude of factor loadings. The results showed that the effect of p on the population CFI and TLI depended on the type of specification error, whereas a higher p was associated with lower values of the population RMSEA regardless of the type of model misspecification. In finite samples, all three fit indices tended to yield estimates that suggested a worse fit than their population counterparts, which was more pronounced with a smaller sample size, higher p, and lower factor loading.},
author = {Shi, Dexin and Lee, Taehun and Maydeu-Olivares, Alberto},
doi = {10.1177/0013164418783530},
file = {::},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {fit indices,model size effect,structural equation modeling (SEM)},
month = {apr},
number = {2},
pages = {310--334},
publisher = {SAGE Publications Inc.},
title = {{Understanding the Model Size Effect on SEM Fit Indices}},
url = {http://journals.sagepub.com/doi/10.1177/0013164418783530},
volume = {79},
year = {2019}
}
@article{Smaldino2016a,
abstract = {Poor research design and data analysis encourage false-positive findings. Such poor methods persist despite perennial calls for improvement, suggesting that they result from something more than just misunderstanding. The persistence of poor methods results partly from incentives that favour them, leading to the natural selection of bad science. This dynamic requires no conscious strategizing—no deliberate cheating nor loafing— by scientists, only that publication is a principal factor for career advancement. Some normative methods of analysis have almost certainly been selected to further publication instead of discovery. In order to improve the culture of science, a shift must be made away from correcting misunderstandings and towards rewarding understanding. We support this argument with empirical evidence and computational modelling. We first present a 60-year meta-analysis of statistical power in the behavioural sciences and show that power has not improved despite repeated demonstrations of the necessity of increasing power. To demonstrate the logical consequences of structural incentives, we then present a dynamic model of scientific communities in which competing laboratories investigate novel or previously published hypotheses using culturally transmitted research methods. As in the real world, successful labs produce more ‘progeny,' such that their methods are more often copied and their students are more likely to start labs of their own. Selection for high output leads to poorer methods and increasingly high false discovery rates. We additionally show that replication slows but does not stop the process of methodological deterioration. Improving the quality of research requires change at the institutional level.},
annote = {This was an excellent article that used an evolutionary model to describe how incentivizing researchers for publications will necessarily lead to bad science. Unfortunately, they didn't offer any real concrete suggestions about how to fix things, other than saying institutional pressures need to change.},
archivePrefix = {arXiv},
arxivId = {1605.09511},
author = {Smaldino, Paul E. and McElreath, Richard},
doi = {10.1098/rsos.160384},
eprint = {1605.09511},
file = {::},
issn = {20545703},
journal = {Royal Society Open Science},
keywords = {Campbell's law,Cultural evolution,Ethics,Incentives,Metascience,Replication,Statistical power,change: top-down,competing goals of a study,conflict of interest,failed replications,grant funding: problems of,meta-analysis: evidence against,nothings changing,null hypothesis, bias against,null results: prevalence in science,openness and transparency, not enough,pre-registration, criticisms,reform,replication, not enough,tradeoff of scientific goals},
mendeley-tags = {Ethics,change: top-down,competing goals of a study,conflict of interest,failed replications,grant funding: problems of,meta-analysis: evidence against,nothings changing,null hypothesis, bias against,null results: prevalence in science,openness and transparency, not enough,pre-registration, criticisms,reform,replication, not enough,tradeoff of scientific goals},
month = {sep},
number = {9},
publisher = {Royal Society Publishing},
title = {{The natural selection of bad science}},
volume = {3},
year = {2016}
}
@phdthesis{Burton1950,
author = {Burton, Alma P},
publisher = {Brigham Young University-Provo},
school = {Brigham Young University},
title = {{Karl G. Maeser: Mormon Educator}},
year = {1950}
}
@book{Elliot2017,
address = {Oxford England},
author = {Elliot, Kevin C.},
publisher = {Oxford University Press},
title = {{A tapestry of values: An introduction to values in science}},
year = {2017}
}
@incollection{Gigerenzer1987,
address = {Cambridge, MA},
author = {Gigerenzer, Gerd},
booktitle = {The probabilistic revolution},
pages = {11--33},
publisher = {MIT Press},
title = {{Probabilistic thinking and the fight against subjectivity}},
year = {1987}
}
@article{Wang2018,
abstract = {Background: Inappropriate analysis and reporting of biomedical research remain a problem despite advances in statistical methods and efforts to educate researchers. Objective: To determine the frequency and severity of requests biostatisticians receive from researchers for inappropriate analysis and reporting of data during statistical consultations. Design: Online survey. Setting: United States. Participants: A randomly drawn sample of 522 American Statistical Association members self-identifying as consulting biostatisticians. Measurements: The Bioethical Issues in Biostatistical Consulting Questionnaire soliciting reports about the frequency and perceived severity of specific requests for inappropriate analysis and reporting. Results: Of 522 consulting biostatisticians contacted, 390 provided sufficient responses: A completion rate of 74.7{\%}. The 4 most frequently reported inappropriate requests rated as "most severe" by at least 20{\%} of the respondents were, in order of frequency, removing or altering some data records to better support the research hypothesis; interpreting the statistical findings on the basis of expectation, not actual results; not reporting the presence of key missing data that might bias the results; and ignoring violations of assumptions that would change results from positive to negative. These requests were reported most often by younger biostatisticians. Limitations: The survey provides information on the reported frequency of inappropriate requests but not on how such requests were handled or whether the requests reflected researchers' maleficence or inadequate knowledge about statistical and research methods. In addition, other inappropriate requests may have been made that were not prespecified in the survey. Conclusion: This survey suggests that researchers frequently make inappropriate requests of their biostatistical consultants regarding the analysis and reporting of their data. Understanding the reasons for these requests and how they are handled requires further study. Primary Funding Source: U.S. Department of Health and Human Services.},
author = {Wang, Min Qi and Yan, Alice F. and Katz, Ralph V.},
doi = {10.7326/M18-1230},
file = {::},
issn = {15393704},
journal = {Annals of Internal Medicine},
number = {8},
title = {{Researcher requests for inappropriate analysis and reporting: A U.S. Survey of consulting biostatisticians}},
volume = {169},
year = {2018}
}
@book{Nagel1961,
address = {New York, NY},
author = {Nagel, E.},
publisher = {Brace and World},
title = {{The structure of science: problems in the logic of scientific explanation}},
year = {1961}
}
@incollection{Merton1973,
author = {Merton, Robert K},
booktitle = {The Sociology of science: theoretical and empirical investigations},
editor = {W, Storer N},
publisher = {Chicago: University of Chicago Press},
title = {{The normative structure of science}},
year = {1973}
}
@article{Howard2003,
author = {Howard, Don},
journal = {Logical Empiricism in North America},
pages = {25--93},
publisher = {Minneapolis: University of Minnesota Press},
title = {{Two left turns make a right: On the curious political career of North American philosophy of science at midcentury}},
year = {2003}
}
@book{Hempel1966,
address = {Englewood Cliffs, N.J.},
author = {Hempel, C. G.},
publisher = {Prentice Hall},
title = {{Philosophy of natural science}},
year = {1966}
}
@book{McCumber2001,
author = {McCumber, John},
isbn = {0810118092},
publisher = {Northwestern University Press},
title = {{Time in the ditch: American philosophy and the McCarthy era}},
year = {2001}
}
@article{Jonas2017,
author = {Jonas, Kai J. and Cesario, Joseph and Alger, Madeliene and Bailey, April H. and Bombari, Dario and Carney, Dana and Dovidio, John F. and Duffy, Sean and Harder, Jenna A. and van Huistee, Dian and Jackson, Benita and Johnson, David J. and Keller, Victor N. and Klaschinski, Lukas and LaBelle, Onawa and LaFrance, Marianne and {M. Latu}, Ioana and Morssinkhoff, Margot and Nault, Kelly and Pardal, Vaani and Pulfrey, Caroline and Rohleder, Nicolas and Ronay, Richard and Richman, Laura Smart and Mast, Marianne Schmid and Schnabel, Konrad and Schr{\"{o}}der-Ab{\'{e}}, Michaela and Tybur, Josh M.},
doi = {10.1080/23743603.2017.1342447},
file = {::},
issn = {23743611},
journal = {Comprehensive Results in Social Psychology},
keywords = {peer-review preregistration,power poses},
month = {jan},
number = {1},
pages = {139--141},
publisher = {Routledge},
title = {{Power poses–where do we stand?}},
volume = {2},
year = {2017}
}
@article{Hagger2016,
abstract = {Good self-control has been linked to adaptive outcomes such as better health, cohesive personal relationships, success in the workplace and at school, and less susceptibility to crime and addictions. In contrast, self-control failure is linked to maladaptive outcomes. Understanding the mechanisms by which self-control predicts behavior may assist in promoting better regulation and outcomes. A popular approach to understanding self-control is the strength or resource depletion model. Self-control is conceptualized as a limited resource that becomes depleted after a period of exertion resulting in self-control failure. The model has typically been tested using a sequential-task experimental paradigm, in which people completing an initial self-control task have reduced self-control capacity and poorer performance on a subsequent task, a state known as ego depletion. Although a meta-analysis of ego-depletion experiments found a medium-sized effect, subsequent meta-analyses have questioned the size and existence of the effect and identified instances of possible bias. The analyses served as a catalyst for the current Registered Replication Report of the ego-depletion effect. Multiple laboratories (k = 23, total N = 2,141) conducted replications of a standardized ego-depletion protocol based on a sequential-task paradigm by Sripada et al. Meta-analysis of the studies revealed that the size of the ego-depletion effect was small with 95{\%} confidence intervals (CIs) that encompassed zero (d = 0.04, 95{\%} CI [−0.07, 0.15]. We discuss implications of the findings for the ego-depletion effect and the resource depletion model of self-control.},
author = {Hagger, Martin S. and Chatzisarantis, Nikos L.D. and Alberts, Hugo and Anggono, Calvin Octavianus and Batailler, C{\'{e}}dric and Birt, Angela R. and Brand, Ralf and Brandt, Mark J. and Brewer, Gene A. and Bruyneel, Sabrina and Calvillo, Dustin P. and Campbell, W. Keith and Cannon, Peter R. and Carlucci, Marianna and Carruth, Nicholas P. and Cheung, Tracy T.L. and Crowell, Adrienne and {De Ridder}, Denise T.D. and Dewitte, Siegfried and Elson, Malte and Evans, Jacqueline R. and Fay, Benjamin A. and Fennis, Bob M. and Finley, Anna and Francis, Zo{\"{e}} and Heise, Elke and Hoemann, Henrik and Inzlicht, Michael and Koole, Sander L. and Koppel, Lina and Kroese, Floor M. and Lange, Florian and Lau, Kevin and Lynch, Bridget P. and Martijn, Carolien and Merckelbach, Harald and Mills, Nicole V. and Michirev, Alexej and Miyake, Akira and Mosser, Alexandra E. and Muise, Megan and Muller, Dominique and Muzi, Milena and Nalis, Dario and Nurwanti, Ratri and Otgaar, Henry and Philipp, Michael C. and Primoceri, Pierpaolo and Rentzsch, Katrin and Ringos, Lara and Schlinkert, Caroline and Schmeichel, Brandon J. and Schoch, Sarah and Schrama, Michel and Sch{\"{u}}tz, Astrid and Stamos, Angelos and Tingh{\"{o}}g, Gustav and Ullrich, Johannes and VanDellen, Michelle R. and Wimbarti, Supra and Wolff, Wanja and Yusainy, Cleoputri and Zerhouni, Oulmann and Howe, Mark L. and Zwienenberg, Maria},
doi = {10.1177/1745691616652873},
file = {::},
issn = {17456924},
journal = {Perspectives on Psychological Science},
keywords = {energy model,meta-analysis,resource depletion,self-regulation,strength model},
month = {jul},
number = {4},
pages = {546--573},
pmid = {27474142},
publisher = {SAGE Publications Inc.},
title = {{A Multilab Preregistered Replication of the Ego-Depletion Effect}},
url = {http://journals.sagepub.com/doi/10.1177/1745691616652873},
volume = {11},
year = {2016}
}
@article{Tannoch-Bland1997,
abstract = {Sandra Harding is working on the reconstruction of scientific objectivity. Lorraine Daston argues that objectivity is a concept that has historically evolved. Her account of the development of “aperspectival objectivity” provides an opportunity to see Harding's “strong objectivity” project as a stage in this evolution, to locate it in the history of migration of ideals from moral philosophy to natural science, and to support Harding's desire to retain something of the ontological significance of objectivity.},
author = {Tannoch-Bland, Jennifer},
doi = {10.1111/j.1527-2001.1997.tb00176.x},
issn = {0887-5367},
journal = {Hypatia},
month = {feb},
number = {1},
pages = {155--178},
publisher = {Cambridge University Press (CUP)},
title = {{From Aperspectival Objectivity to Strong Objectivity: The Quest for Moral Objectivity}},
url = {http://doi.wiley.com/10.1111/j.1527-2001.1997.tb00176.x},
volume = {12},
year = {1997}
}
@article{Daston1992,
author = {Daston, Lorraine},
journal = {Social Studies of Science},
number = {4},
pages = {597--618},
title = {{Objectivity and the Escape from Perspective on JSTOR}},
url = {https://www.jstor.org/stable/285456?seq=1{\#}metadata{\_}info{\_}tab{\_}contents},
volume = {22},
year = {1992}
}
@phdthesis{Correll2015,
author = {Correll, Michael A},
file = {::;:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Correll - 2015 - Visual Statistics.pdf:pdf},
school = {University of Wisconsin-Madison},
title = {{Visual Statistics}},
type = {Doctoral Dissertation},
year = {2015}
}
@phdthesis{Correll2015,
author = {Correll, Michael A},
file = {::;:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Correll - 2015 - Visual Statistics.pdf:pdf},
school = {University of Wisconsin-Madison},
title = {{Visual Statistics}},
type = {Doctoral Dissertation},
year = {2015}
}
@article{Correll2014,
abstract = {City A City B Margin of Error +/-15 (a) Bar chart with error bars: the height of the bars encodes the sample mean, and the whiskers encode a 95{\%} t-confidence interval. 0 20 40 60 80 100 Snow Expected (mm) City A City B Margin of Error +/-15 (b) Modified box plot: The whiskers are the 95{\%} t-confidence interval, the box is a 50{\%} t-confidence interval. 0 20 40 60 80 100 Snow Expected (mm) City A City B Margin of Error +/-15 (c) Gradient plot: the transparency of the colored region corresponds to the cumulative density function of a t-distribution. 0 20 40 60 80 100 Snow Expected (mm) City A City B Margin of Error +/-15 (d) Violin plot: the width of the colored region corresponds to the probability density function of a t-distribution. Fig. 1. Four encodings for mean and error evaluated in this work. Each prioritizes a different aspect of mean and uncertainty, and results in different patterns of judgment and comprehension for tasks requiring statistical inferences. Abstract-When making an inference or comparison with uncertain, noisy, or incomplete data, measurement error and confidence intervals can be as important for judgment as the actual mean values of different groups. These often misunderstood statistical quantities are frequently represented by bar charts with error bars. This paper investigates drawbacks with this standard encoding, and considers a set of alternatives designed to more effectively communicate the implications of mean and error data to a general audience, drawing from lessons learned from the use of visual statistics in the information visualization community. We present a series of crowd-sourced experiments that confirm that the encoding of mean and error significantly changes how viewers make decisions about uncertain data. Careful consideration of design tradeoffs in the visual presentation of data results in human reasoning that is more consistently aligned with statistical inferences. We suggest the use of gradient plots (which use transparency to encode uncertainty) and violin plots (which use width) as better alternatives for inferential tasks than bar charts with error bars.},
author = {Correll, Michael A and Gleicher, Michael},
doi = {10.1109/TVCG.2014.2346298},
file = {::},
issn = {2160-9306 VO - 20},
journal = {IEEE Transactions on Visualization and Computer Graphics},
keywords = {Adult,Computer Graphics,Crowdsourcing,Data visualization,Empirical Research,Encoding,Error analysis,Female,Humans,Index Terms-Visual statistics,Information analysis,Male,Standards,Statistical Distributions,Task Performance and Analysis,Visual statistics,Young Adult,bar charts,confidence intervals,crowd-sourcing,data comparison,data inference,data visualisation,empirical evaluation,error bars,gradient plots,information visualization,mean,measurement error,statistical analysis,statistical inference,statistical quantity,visual presentation,visual statistics},
number = {12},
pages = {2142--2151},
title = {{Error Bars Considered Harmful: Exploring Alternate Encodings for Mean and Error}},
url = {http://www.pollster.com},
volume = {20},
year = {2014}
}
@book{Kohn2000,
author = {Kohn, Linda T and Corrigan, Janet and Donaldson, Molla S},
publisher = {National academy press Washington, DC},
title = {{To err is human: building a safer health system}},
volume = {6},
year = {2000}
}
@article{Pfaffel2016a,
author = {Pfaffel, Andreas ; and Schober, Barbara ; and Spiel, Christiane},
doi = {10.7275/x4ep-fv42},
file = {::},
journal = {Practical Assessment, Research, and Evaluation},
keywords = {Educational Research,Research Methodology,Statistical Analysis},
number = {6},
title = {{A Comparison of Three Approaches to Correct for Direct and Indirect Range Restrictions: A Simulation Study}},
volume = {21},
year = {2016}
}
@article{Pfaffel2016b,
author = {Pfaffel, Andreas and Spiel, Christiane},
doi = {10.7275/bwnx-mz97},
file = {::},
journal = {Practical Assessment, Research, and Evaluation},
keywords = {Educational Research,Research Methodology,Statistical Analysis},
pages = {10},
title = {{Accuracy of range restriction correction with multiple imputation Accuracy of range restriction correction with multiple imputation in small and moderate samples: A simulation study in small and moderate samples: A simulation study "Accuracy of range rest}},
volume = {21},
year = {2016}
}
@article{Stark,
abstract = {There are deep and important philosophical differences between Bayesian and frequentist approaches to quantifying uncertainty. However, some practitioners choose between these approaches primarily on the basis of convenience. For instance, the ability to incorporate parameter constraints is sometimes cited as a reason to use Bayesian methods. This reflects two misunderstandings: First, frequentist methods can indeed incorporate constraints on parameter values. Second, it ignores the crucial question of what the result of the analysis will mean. Bayesian and frequentist measures of uncertainty have similar sounding names but quite different meanings. For instance, Bayesian uncertainties typically involve expectations with respect to the posterior distribution of the parameter , holding the data fixed; frequentist uncertainties typically involve expectations with respect to the distribution of the data, holding the parameter fixed. Bayesian methods, including methods incorporating parameter constraints, require supplementing the constraints with a prior probability distribution for parameter values. This can cause frequentist and Bayesian estimates and their nominal uncertainties to differ substantially, even when the prior is "uninformative." This paper gives simple examples where "uninformative" priors are, in fact, extremely informative, and sketches how to measure how much information the prior adds to the constraint. Bayesian methods can have good frequentist behavior, and a frequentist can use Bayesian methods and quantify the uncertainty by frequentist means-but absent a meaningful prior, Bayesian uncertainty measures lack meaning. The paper ends with brief reflections on practice. 1. Introduction. This paper is primarily about philosophy and interpretation rather than computation. It sketches differences in the assumptions frequentist and Bayesian methods require and differences in meaning between frequentist and Bayesian measures of uncertainty, and recapitulates some well-known (within statistics) connections between frequentist and Bayesian methods that are less well known in the wider uncertainty quantification community. It also addresses the misperception that frequentist methods cannot incorporate constraints on unknown parameters, and flags some circumstances in which frequentists might choose to use Bayesian methods without using Bayesian interpretations of the results. This paper focuses on making inferences about a (possibly infinite-dimensional) vector $\theta$ known to be in the subset $\Theta$ of a Hilbert space H, from observations related to that vector. In examples below, the set $\Theta$ is a "ball" in a norm or seminorm. A one-dimensional special case is to estimate a single, real-valued parameter that is known to be between 0 and 1. Then, the Hilbert space H is the real line, the norm is the absolute value, and the set $\Theta$ is a ball of},
author = {Stark, Philip B},
doi = {10.1137/130920721},
file = {::},
keywords = {62C10,62C20,62G15,62P99,Bayesian methods,constraints,frequentist methods,priors,risk AMS subject classifications 62A01,uncertainty quantification},
title = {{Constraints versus Priors *}},
url = {http://www.siam.org/journals/ojsa.php}
}
@article{Hayes2020,
abstract = {Cronbach's alpha ($\alpha$) is a widely-used measure of reliability used to quantify the amount of random measurement error that exists in a sum score or average generated by a multi-item measurement scale. Yet methodologists have warned that $\alpha$ is not an optimal measure of reliability relative to its more general form, McDonald's omega ($\omega$). Among other reasons, that the computation of $\omega$ is not available as an option in many popular statistics programs and requires items loadings from a confirmatory factor analysis (CFA) have probably hindered more widespread adoption. After a bit of discussion of $\alpha$ versus $\omega$, we illustrate the computation of $\omega$ using two structural equation modeling programs (Mplus and AMOS) and the MBESS package for R. We then describe a macro for SPSS and SAS (OMEGA) that calculates $\omega$ in two ways without relying on the estimation of loadings or error variances using CFA. We show that it produces estimates of $\omega$ that are nearly identical to when using CFA-based estimates of item loadings and error variances. We also discuss the use of the OMEGA macro for certain forms of item analysis and brief form construction based on the removal of items from a longer scale.},
author = {Hayes, Andrew F. and Coutts, Jacob J.},
doi = {10.1080/19312458.2020.1718629},
issn = {19312466},
journal = {Communication Methods and Measures},
publisher = {Routledge},
title = {{Use Omega Rather than Cronbach's Alpha for Estimating Reliability. But{\ldots}}},
year = {2020}
}
@book{Meehl1954,
address = {Minneapolis, MN},
author = {Meehl, Paul},
publisher = {University of Minnesota},
title = {{Clinical versus statistical prediction: A theoretical analysis and a review of theevidence}},
year = {1954}
}
@misc{USEqualEmploymentOpportunityCommission,
author = {{U.S Equal Employment Opportunity Commission}},
title = {{U.S Equal Employment Opportunity Commission}},
url = {https://www.eeoc.gov/employers/recordkeeping.cfm},
urldate = {2020-02-27},
year = {2020}
}
@article{Bryan2019,
abstract = {In recent years, the field of psychology has begun to conduct replication tests on a large scale. Here, we show that “replicator degrees of freedom” make it far too easy to obtain and publish false-negative replication results, even while appearing to adhere to strict methodological standards. Specifically, using data from an ongoing debate, we show that commonly exercised flexibility at the experimental design and data analysis stages of replication testing can make it appear that a finding was not replicated when, in fact, it was. The debate that we focus on is representative, on key dimensions, of a large number of other replication tests in psychology that have been published in recent years, suggesting that the lessons of this analysis may be far reaching. The problems with current practice in replication science that we uncover here are particularly worrisome because they are not adequately addressed by the field's standard remedies, including preregistration. Implications for how the field could develop more effective methodological standards for replication are discussed.},
annote = {I actually really liked this article. It gave a demonstration that different takes on a replication can have vastly different effects on the conclusions one makes.},
author = {Bryan, Christopher J. and Yeager, David S. and O'Brien, Joseph M.},
doi = {10.1073/pnas.1910951116},
file = {::},
issn = {10916490},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
keywords = {Ethics, conflict of interest,Null hacking,P-hacking,Replication crisis,Reproducibility,Researcher degrees of freedom,burden of proof,collaboration, with adversaries,competing goals of a study,conflict of interest,failed replications, burden of proof,openness and transparency, not enough,p-hacking, evidence against,pre-registration, criticisms,replication crisis: belief it exists,replication, not enough,replication: criticisms},
mendeley-tags = {Ethics, conflict of interest,burden of proof,collaboration, with adversaries,competing goals of a study,conflict of interest,failed replications, burden of proof,openness and transparency, not enough,p-hacking, evidence against,pre-registration, criticisms,replication crisis: belief it exists,replication, not enough,replication: criticisms},
number = {51},
pages = {25535--25545},
title = {{Replicator degrees of freedom allow publication of misleading failures to replicate}},
volume = {116},
year = {2019}
}
@techreport{Senn2003,
author = {Senn, Stephen},
file = {::},
title = {{Bayesian, Likelihood, and Frequentist Approaches to Statistics A comparison of methods}},
year = {2003}
}
@book{Schafer1997,
author = {Schafer, Joseph L},
isbn = {1439821860},
publisher = {Chapman and Hall/CRC},
title = {{Analysis of incomplete multivariate data}},
year = {1997}
}
@article{Dempster1977,
author = {Dempster, Arthur P and Laird, Nan M and Rubin, Donald B},
issn = {0035-9246},
journal = {Journal of the Royal Statistical Society: Series B (Methodological)},
number = {1},
pages = {1--22},
publisher = {Wiley Online Library},
title = {{Maximum likelihood from incomplete data via the EM algorithm}},
volume = {39},
year = {1977}
}
@article{Pfaffel2016,
abstract = {A recurring methodological problem in the evaluation of the predictive validity of selection methods is that the values of the criterion variable are available for selected applicants only. This so-called range restriction problem causes biased population estimates. Correction methods for direct and indirect range restriction scenarios have widely studied for continuous criterion variables but not for dichotomous ones. The few existing approaches are inapplicable because they do not consider the unknown base rate of success. Hence, there is a lack of scientific research on suitable correction methods and the systematic analysis of their accuracies in the cases of a naturally or artificially dichotomous criterion.We aim to overcome this deficiency by viewing the range restriction problem as a missing data mechanism.We usedmultiple imputation by chained equations to generate complete criterion data before estimating the predictive validity and the base rate of success. Monte Carlo simulations were conducted to investigate the accuracy of the proposed correction in dependence of selection ratio, predictive validity, and base rate of success in an experimental design. In addition, we compared our proposed missing data approach with Thorndike's well-known correction formulas that have only been used in the case of continuous criterion variables so far. The results show that themissing data approach is more accurate in estimating the predictive validity than Thorndike's correction formulas. The accuracy of our proposed correction increases as the selection ratio and the correlation between predictor and criterion increase. Furthermore, the missing data approach provides a valid estimate of the unknown base rate of success. On the basis of our findings, we argue for the use ofmultiple imputation by chained equations in the evaluation of the predictive validity of selection methods when the criterion is dichotomous.},
author = {Pfaffel, Andreas and Kollmayer, Marlene and Schober, Barbara and Spiel, Christiane},
doi = {10.1371/journal.pone.0152330},
file = {::},
issn = {19326203},
journal = {PLoS ONE},
month = {mar},
number = {3},
pages = {e0152330},
publisher = {Public Library of Science},
title = {{A missing data approach to Correct for direct and indirect range restrictions with a dichotomous criterion: A simulation study}},
volume = {11},
year = {2016}
}
@article{Roth1994,
abstract = {There has been conspicuously little research concerning missing data problems in the applied psychology literature. Fortunately, other fields have begun to investigate this issue. These include survey research, marketing, statistics, economics, and biometrics. A review of this literature suggests several trends for applied psychologists. For example, listwise deletion of data is often the least accurate technique to deal with missing data. Other methods for estimating missing data scores may be more accurate and preserve more data for investigators to analyze. Further, the literature reveals that the amount of missing data and the reasons for deletion of data impact how investigators should handle the problem. Finally, there is a great need for more investigation of strategies for dealing with missing data, especially when data are missing in nonrandom or systematic patterns. Copyright {\textcopyright} 1994, Wiley Blackwell. All rights reserved},
author = {Roth, Philip L.},
doi = {10.1111/j.1744-6570.1994.tb01736.x},
issn = {17446570},
journal = {Personnel Psychology},
month = {sep},
number = {3},
pages = {537--560},
publisher = {John Wiley {\&} Sons, Ltd},
title = {{Missing data: a conceptual review for applied psychologists}},
url = {http://doi.wiley.com/10.1111/j.1744-6570.1994.tb01736.x},
volume = {47},
year = {1994}
}
@article{Breitzman2002a,
abstract = {The use of patent analysis in many different contexts is discussed. Applications discussed range from intellectual property management, to merger and acquisition targeting and due diligence, to stock market valuation. The authors draw on 38 years of combined patent.related experience to discuss a number of techniques and metrics that have been found to work well in specific situations.},
author = {Breitzman, Anthony and Mogee, Mary Ellen},
doi = {10.1177/016555150202800302},
issn = {01655515},
journal = {Journal of Information Science},
title = {{The many applications of patent analysis}},
year = {2002}
}
@article{Ashcraft2012,
abstract = {Ashcraft, C. {\&} Breitzman, T. (2012). NCWIT Who invents IT? Women's participation in technology patenting, 2012 update, 9–10. Retrieved from www.ncwit.org/patentreport.},
author = {Ashcraft, C. and Breitzman, Anthony},
title = {{Who invents IT? Women's participation in Information Technology patenting 2012 Update}},
year = {2012}
}
@article{Blevins2015,
abstract = {This article describes a new method for inferring the gender of personal names using large historical datasets. In contrast to existing methods of gender prediction that treat names as if they are timelessly associated with one gender, this method uses a historical approach that takes into account how naming practices change over time. It uses historical data to measure the likelihood that a name was associated with a particular gender based on the time or place under study. This approach generates more accurate results for sources that encompass changing periods of time, providing digital humanities scholars with a tool to estimate the gender of names across large textual collections. The article first describes the methodology as implemented in the gender package for the R programming language. It goes on to apply the method to a case study in which we examine gender and gatekeeping in the American historical profession over the past half-century. The gender package illustrates the importance of incorporating historical approaches into computer science and related fields.},
author = {Blevins, Cameron and Mullen, Lincoln},
issn = {1938-4122},
journal = {Digital Humanities Quarterly},
title = {{Jane, John {\ldots} Leslie? A Historical Method for Algorithmic Gender Prediction}},
year = {2015}
}
@article{Chang2009,
abstract = {This article describes and illustrates the usefulness of a sophisticated methodological tool based on patents, citations, co-citations, and clustering of patents for identifying emerging, high-impact technology clusters and trends. This tool can provide a greater understanding of how emerging, technological clusters form from previous research conducted; the organizations involved; inventors' locations; the core technologies being built upon and technologies currently being pursued; and an early indication of commercial applications that may result. This tool should be of value to program managers interested in tracing the results of funded research projects to economic outcomes and societal impacts; policy-makers interested in fostering regional innovation; and evaluators who are curious how patents can be used as a prospective tool for innovative activity. This article covers in broad strokes the development and validation of the tool, its limitations, key results and highlights from analyzing the top 100 emerging technological clusters, and concluding thoughts.},
author = {Chang, Connie K.N. and Breitzman, Anthony},
doi = {10.3152/095820209X481066},
issn = {09582029},
journal = {Research Evaluation},
title = {{Using patents prospectively to identify emerging, high-impact technological clusters}},
year = {2009}
}
@article{Hicks2000,
abstract = {Highly cited American papers are selectively cited by American patents. A US paper among the top 1{\%} most highly cited papers is nine times more likely to be cited by a US patent than a randomly chosen US paper. Governments hoping that the research they fund will foster innovation should therefore emphasize research excellence because agencies supporting the best research will support the research most likely to contribute to innovation. When mediocre research is supported, for whatever reason, neither science nor innovation is likely to gain much direct benefit.},
author = {Hicks, D. and Breitzman, Anthony and Hamilton, K. and Narin, F.},
doi = {10.3152/147154300781781805},
issn = {03023427},
journal = {Science and Public Policy},
title = {{Research excellence and patented innovation}},
year = {2000}
}
@article{Santamaria2018,
abstract = {The increased interest in analyzing and explaining gender inequalities in tech, media, and academia highlights the need for accurate inference methods to predict a person's gender from their name. Several such services exist that provide access to large databases of names, often enriched with information from social media profiles, culture-specific rules, and insights from sociolinguistics. We compare and benchmark five nameto- gender inference services by applying them to the classification of a test data set consisting of 7,076 manually labeled names. The compiled names are analyzed and characterized according to their geographical and cultural origin. We define a series of performance metrics to quantify various types of classification errors, and define a parameter tuning procedure to search for optimal values of the services' free parameters. Finally, we perform benchmarks of all services under study regarding several scenarios where a particular metric is to be optimized.},
author = {Santamar{\'{i}}a, Luc{\'{i}}a and Mihaljevi{\'{c}}, Helena},
doi = {10.7717/peerj-cs.156},
issn = {23765992},
journal = {PeerJ Computer Science},
title = {{Comparison and benchmark of name-to- gender inference services}},
year = {2018}
}
@inproceedings{Lu2006,
abstract = {Human faces provide demographic information, such as gender and ethnicity. Different modalities of human faces, e.g., range and intensity, provide different cues for gender and ethnicity identifications. In this paper we exploit the range information of human faces for ethnicity identification using a support vector machine. An integration scheme is also proposed for ethnicity and gender identifications by combining the registered range and intensity images. The experiments are conducted on a database containing 1240 facial scans of 376 subjects. It is demonstrated that the range modality provides competitive discriminative power on ethnicity and gender identifications to the intensity modality. For both gender and ethnicity identifications, the proposed integration scheme out-performs each individual modality. {\textcopyright} Springer-Verlag Berlin Heidelberg 2005.},
author = {Lu, Xiaoguang and Chen, Hong and Jain, Anil K.},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/11608288_74},
isbn = {3540311114},
issn = {03029743},
title = {{Multimodal facial gender and ethnicity identification}},
year = {2006}
}
@article{Breitzman2015,
abstract = {Emerging technologies are of great interest to a wide range of stakeholders, but identifying such technologies is often problematic, especially given the overwhelming amount of information available to analysts and researchers on many subjects. This paper describes the Emerging Clusters Model, which uses advanced patent citation techniques to locate emerging technologies in close to real time, rather than retrospectively. The model covers multiple patent systems, and is designed to be extensible to additional systems. This paper also describes the first large scale test of the Emerging Clusters Model. This test reveals that patents in emerging clusters consistently have a significantly higher impact on subsequent technological developments than patents outside these clusters. Given that these emerging clusters are defined as soon as a given time period ends, without the aid of any forward-looking information, this suggests that the Emerging Clusters Model may be a useful tool for identifying interesting new technologies as they emerge.},
author = {Breitzman, Anthony and Thomas, Patrick},
doi = {10.1016/j.respol.2014.06.006},
issn = {00487333},
journal = {Research Policy},
title = {{The Emerging Clusters Model: A tool for identifying emerging technologies across multiple patent systems}},
year = {2015}
}
@article{Correll2007,
abstract = {Survey research finds that mothers suffer a substantial wage penalty, although the causal mechanism producing it remains elusive. The authors employed a laboratory experiment to evaluate the hypothesis that status-based discrimination plays an important role and an audit study of actual employers to assess its real-world implications. In both studies, participants evaluated application materials for a pair of same-gender equally qualified job candidates who differed on parental status. The laboratory experiment found that mothers were penalized on a host of measures, including perceived competence and recommended starting salary. Men were not penalized for, and sometimes benefited from, being a parent. The audit study showed that actual employers discriminate against mothers, but not against fathers. {\textcopyright} 2007 by The University of Chicago. All rights reserved.},
author = {Correll, Shelley J. and Benard, Stephen and Paik, In},
doi = {10.1086/511799},
issn = {00029602},
journal = {American Journal of Sociology},
number = {5},
title = {{Getting a job: Is there a motherhood penalty?}},
volume = {112},
year = {2007}
}
@article{Ceci2011,
abstract = {Explanations for women's underrepresentation in math-intensive fields of science often focus on sex discrimination in grant and manuscript reviewing, interviewing, and hiring. Claims that women scientists suffer discrimination in these arenas rest on a set of studies undergirding policies and programs aimed at remediation. More recent and robust empiricism, however, fails to support assertions of discrimination in these domains. To better understand women's underrepresentation in math-intensive fields and its causes, we reprise claims of discrimination and their evidentiary bases. Based on a review of the past 20 y of data, we suggest that some of these claims are no longer valid and, if uncritically accepted as current causes of women's lack of progress, can delay or prevent understanding of contemporary determinants of women's underrepresentation. We conclude that differential gendered outcomes in the real world result from differences in resources attributable to choices, whether free or constrained, and that such choices could be influenced and better informed through education if resources were so directed. Thus, the ongoing focus on sex discrimination in reviewing, interviewing, and hiring represents costly, misplaced effort: Society is engaged in the present in solving problems of the past, rather than in addressing meaningful limitations deterring women's participation in science, technology, engineering, and mathematics careers today. Addressing today's causes of underrepresentation requires focusing on education and policy changes that will make institutions responsive to differing biological realities of the sexes. Finally, we suggest potential avenues of intervention to increase gender fairness that accord with current, as opposed to historical, findings.},
author = {Ceci, Stephen J. and Williams, Wendy M.},
doi = {10.1073/pnas.1014871108},
issn = {00278424},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
number = {8},
pages = {3157--3162},
title = {{Understanding current causes of women's underrepresentation in science}},
volume = {108},
year = {2011}
}
@article{Gu2015,
abstract = {According to the features of Chinese personal name, we present an approach for Chinese personal name recognition based on conditional random fields (CRF) and knowledge base in this paper. The method builds multiple features of CRF model by adopting Chinese character as processing unit, selects useful features based on selection algorithm of knowledge base and incremental feature template, and finally implements the automatic recognition of Chinese personal name from Chinese document. The experimental results on open real corpus demonstrated the effectiveness of our method and obtained high accuracy rate and high recall rate of recognition.},
author = {Gu, Chuan and Tian, Xi Ping and Yu, Jiang De},
doi = {10.1155/2015/480879},
issn = {15635147},
journal = {Mathematical Problems in Engineering},
title = {{Automatic Recognition of Chinese Personal Name Using Conditional Random Fields and Knowledge Base}},
year = {2015}
}
@unpublished{BannerjeeAbhimanyuandChigurupati2014,
abstract = {The objective of this project is to identify the gender of a person by looking at his/her photograph. This is a case of supervised learning where the algorithm is first trained on a set of female and male faces, and then used to classify new data. We have not taken genders other than Male and Female into account. A preliminary algorithm is run to make sure that an image is that of a human before classification begins},
author = {{Bannerjee, Abhimanyu and Chigurupati}, Asha},
pages = {1--5},
title = {{Identifying Gender From Images of Faces}},
year = {2014}
}
@article{Aslam2019,
abstract = {{\textcopyright} 2019 SPIE and IS and T. We develop a gender classification method using convolutional neural networks. We train Alexnet Architecture using the luminance (Y) component of the facial image (YCbCr) for the SoF, groups, and face recognition technology datasets. The Y component is reduced to a size of 32 × 32 via discrete wavelet transform (DWT). The use of the Y plane and a low-resolution subband image of the DWT significantly reduce the amount of processed data. We are able to achieve better results than other machine learning, rule-based approaches and the traditional convolutional neural net structure that are trained with three-dimensional RGB images. We are able to maintain comparably high recognition accuracy, even with the reduction of the number of network layers. We have also compared our structure with the state-of-the-art methods and provided the recognition rates.},
author = {Aslam, Aasma and Hayat, Khizar and Umar, Arif Iqbal and Zohuri, Bahman and Zarkesh-Ha, Payman and Modissette, David and Khan, Sahib Zar and Hussian, Babar},
doi = {10.1117/1.jei.28.1.013012},
issn = {1560-229X},
journal = {Journal of Electronic Imaging},
title = {{Wavelet-based convolutional neural networks for gender classification}},
year = {2019}
}
@article{Aslam2018,
abstract = {classification based on isolated facial features and foggy faces using jointly trained deep convolutional neural network," Abstract. Gender classification, a two-class problem (male or female), has been the subject of extensive research recently and gained a lot of attention due to its varied set of applications. The proposed work relies on individual facial features to train a convolutional neural network (CNN) for gender classification. In contrast with previously reported results that assume the facial features are independent, we consider the facial features as correlated features by training a single CNN that jointly learns from all facial features. In terms of accuracy, our results either outperform, or are on par with, other gender classification techniques applied to three different datasets namely specs on faces, groups, and face recognition technology. In terms of performance, the proposed CNN has significantly fewer parameters as compared with other techniques reported in the literature. Our learnable parameters are fewer than those required in techniques reported in recent work, which enables them to make the network less sensitive to over-fitting and easier to train than techniques that use different CNNs for each facial feature as reported in the literature.},
author = {Aslam, Aasma and Hussain, Babar and Cetin, Ahmet Enis and Umar, Arif Iqbal and Ansari, Rashid},
doi = {10.1117/1.jei.27.5.053023},
issn = {1560-229X},
journal = {Journal of Electronic Imaging},
title = {{Gender classification based on isolated facial features and foggy faces using jointly trained deep convolutional neural network}},
year = {2018}
}
@article{Breitzman2015a,
abstract = {Forward citations are widely recognized as a useful measure of the impact of patents upon subsequent technological developments. However, an inherent characteristic of forward citations is that they take time to accumulate. This makes them valuable for retrospective impact evaluations, but less helpful for prospective forecasting exercises. To overcome this, it would be desirable to have indicators that forecast future citations at the time a patent is issued. In this paper, we outline one such indicator, based on the size of the inventor teams associated with patents. We demonstrate that, on average, patents with eight or more co-inventors are cited significantly more frequently in their first 5 years than peer patents with fewer inventors. This result holds true across technologies, assignee type, citation source (examiner versus applicant), and after self-citations are accounted for. We hypothesize that inventor team size may be a reflection of the amount of resources committed by an organization to a given innovation, with more researchers attached to innovations regarded as having particular promise or value.},
author = {Breitzman, Anthony and Thomas, Patrick},
doi = {10.1007/s11192-015-1550-5},
issn = {15882861},
journal = {Scientometrics},
title = {{Inventor team size as a predictor of the future citation impact of patents}},
year = {2015}
}
@article{Hicks2001,
abstract = {In this paper, patent bibliometrics are used to investigate shifts underway in the American innovation system. Patent indicators point to extraordinarily dynamic innovation in information and health technologies accompanied by a shift in the center of US innovation from the East to the West Coast. Innovating companies are adapting to the demise of large corporate basic research laboratories by using their own research instead. Growth in university patenting is also striking, and universities have become dominant patentees in a few large, economically vibrant cities. The importance of universities to local innovation has also been revealed by the growth in referencing from patents to papers which allows us to see that companies preferentially reference papers from in-state public sector institutions. {\textcopyright} 2001 Elsevier Science B.V.},
author = {Hicks, Diana and Breitzman, Anthony and Olivastro, Dominic and Hamilton, Kimberly},
doi = {10.1016/S0048-7333(00)00147-5},
issn = {00487333},
journal = {Research Policy},
title = {{The changing composition of innovative activity in the US - A portrait based on patent analysis}},
year = {2001}
}
@misc{Burrelli2008,
abstract = {This InfoBrief examines the trends from 1973 to 2006 in the employment of women faculty and in the percentages of full professors and of tenured faculty who are women. The trends are examined by field of doctorate, Carnegie classification of employer, marital status, and the presence of children in the home. Because the S{\&}E doctorate holders employed in academic institutions in 2006 were awarded their doctorates over a span of about three decades, these trends are examined against the background of changing percentages of S{\&}E doctorates earned by women over time, starting with the 1958 degree year.3},
author = {Burrelli, Joan},
booktitle = {Statistics},
title = {{Thirty-Three Years of Women in S{\&}E Faculty Positions}},
year = {2008}
}
@inproceedings{Sboev2018,
abstract = {We present the analysis of approaches to solve an author gender identification task for Russian-language texts with gender deception, using different Data-Driven models based on conventional machine learning (Support Vector Classifier, Decision Tree, Gradient Boosting) and neuronet algorithms (convolutional layers, long short-term memory layers, etc.) The source of training and testing data are collections of texts from the Gender Imitation corpus, expanded by crowd-sourcing and supplemented with files of RusProfiling and RusPersonality corpora. The reached accuracy of this task milestone is presented and discussed.},
author = {Sboev, Alexander and Moloshnikov, Ivan and Gudovskikh, Dmitry and Selivanov, Anton and Rybka, Roman and Litvinova, Tatiana},
booktitle = {Procedia Computer Science},
doi = {10.1016/j.procs.2018.01.064},
issn = {18770509},
title = {{Automatic gender identification of author of Russian text by machine learning and neural net algorithms in case of gender deception}},
year = {2018}
}
@article{West2013,
abstract = {Gender disparities appear to be decreasing in academia according to a number of metrics, such as grant funding, hiring, acceptance at scholarly journals, and productivity, and it might be tempting to think that gender inequity will soon be a problem of the past. However, a large-scale analysis based on over eight million papers across the natural sciences, social sciences, and humanities reveals a number of understated and persistent ways in which gender inequities remain. For instance, even where raw publication counts seem to be equal between genders, close inspection reveals that, in certain fields, men predominate in the prestigious first and last author positions. Moreover, women are significantly underrepresented as authors of single-authored papers. Academics should be aware of the subtle ways that gender disparities can occur in scholarly authorship. {\textcopyright} 2013 West et al.},
author = {West, Jevin D. and Jacquet, Jennifer and King, Molly M. and Correll, Shelley J. and Bergstrom, Carl T.},
doi = {10.1371/journal.pone.0066212},
issn = {19326203},
journal = {PLoS ONE},
number = {7},
pmid = {23894278},
title = {{The Role of Gender in Scholarly Authorship}},
volume = {8},
year = {2013}
}
@article{Breitzman2002,
abstract = {Patent analysis can be used for several aspects of Merger {\&} Acquisition activities, including targeting, due-diligence, compatibility, and valuation. In the due-diligence stage, it can also be used to ensure that the target company's technology infrastructure is sound and that its key inventors still work at the company. In assessing technology compatibility, patent analysis can provide insights into the fit between the patent portfolios of the acquiring company and its acquisition target. Having established the technological quality and compatibility of a target company, patent analysis can be used to assess whether the stock market appears to be valuing it fairly. In addition, these methods can be used to identify undervalued companies in which to invest. and for competitive intelligence applications. Although not applicable to every industry, patent analysis is particularly effective in industries with substantial patented technologies, including telecommunications, information technology, pharmaceuticals, biotechnology, chemicals, and automotive.},
author = {Breitzman, Anthony and Thomas, Patrick},
doi = {10.1080/08956308.2002.11671519},
issn = {08956308},
journal = {Research Technology Management},
title = {{Using patent citation analysis to target-value M{\&}A candidates}},
year = {2002}
}
@article{Narin1995,
abstract = {The 1926 observation by Lotka that the number of highly productive scientists was a relatively small fraction of all scientists has been extended to patented technology. Specifically, Lotka observed that for every 100 scientists who produce one paper there are approximately 100 22, or 25, who produce two papers, 100 32, or 11, who produce three, etc., with only one scientist in the set who will produce ten papers. An investigation of the number of patents per inventor was carried out for four companies, two American and two Japanese, in the area of semiconductors. For all four cases a Lotka-like distribution was found, with a relatively small number of highly productive inventors with their names on ten or more patents, and a large number of inventors with their names on only one, and a general factor of ten difference in productivity between the most- and the least-productive inventors. {\textcopyright} 1995.},
author = {Narin, Francis and Breitzman, Anthony},
doi = {10.1016/S0048-7333(94)00780-2},
issn = {00487333},
journal = {Research Policy},
title = {{Inventive productivity}},
year = {1995}
}
@inproceedings{NationalAcademyofScience2007,
abstract = {The United States economy relies on the productivity, entrepreneurship, and creativity of its people. To maintain its scientific and engineering leadership amid increasing economic and educational globalization, the United States must aggressively pursue the innovative capacity of all its people-women and men. However, women face barriers to success in every field of science and engineering; obstacles that deprive the country of an important source of talent. Without a transformation of academic institutions to tackle such barriers, the future vitality of the U.S. research base and economy are in jeopardy. Beyond Bias and Barriers explains that eliminating gender bias in academia requires immediate overarching reform, including decisive action by university administrators, professional societies, federal funding agencies and foundations, government agencies, and Congress. If implemented and coordinated across public, private, and government sectors, the recommended actions will help to improve workplace environments for all employees while strengthening the foundations of America's competitiveness.},
address = {Jacksonville, FL},
author = {Agogino, A.},
booktitle = {Bulletin of the American Physical Society},
doi = {10.17226/11741},
isbn = {0309100429},
issn = {1471-5546},
pages = {1--317},
publisher = {American Physical Society},
title = {{Beyond bias and barriers: Fulfilling the potential of women in academic science and engineering}},
year = {2007}
}
@article{Wright2003,
abstract = {Purpose. The influx of women into academic medicine has not been accompanied by equality for male and female faculty. Women earn less than men in comparable positions, progress more slowly through academic ranks, and have not attained important leadership roles. This study tested hypotheses about why gender disparities exist in salary, rank, track, leadership, and perceptions of campus climate at one academic center, the University of Arizona College of Medicine, Tucson. Method. Salary, rank, and track data were obtained from institutional databases for the 1999-2000 fiscal year. A structured, online questionnaire was made available to 418 faculty members to collect information about their goals, attitudes, and experiences. Results. A total of 198 faculty members completed the questionnaire. The data showed significant gender differences in faculty salaries, ranks, tracks, leadership positions, resources, and perceptions of academic climate. On average, women earned {\$}12,777 or 11{\%} less than men, after adjusting for rank, track, degree, specialty, years in rank, and administrative positions (p {\textless} .0003). Of female faculty, 62{\%} were assistant professors (49{\%} of women were non-tenure-eligible assistant professors), while 55{\%} of male faculty were promoted and tenured. Almost a third of women reported being discriminated against, compared with only 5{\%} of men (p {\textless} .00001). Conclusion. Substantial gender differences in the rewards and opportunities of academic medicine remain, that can not be attributed to differences in productivity or committment between women and men.},
author = {Wright, Anne L. and Schwindt, Leslie A. and Bassford, Tamsen L. and Reyna, Valerie F. and Shisslak, Catherine M. and {St. Germain}, Patricia A. and Reed, Kathryn L.},
doi = {10.1097/00001888-200305000-00015},
issn = {10402446},
journal = {Academic Medicine},
number = {5},
pages = {500--508},
pmid = {12742788},
title = {{Gender differences in academic advancement: Patterns, causes, and potential solutions in one U.S. College of Medicine}},
volume = {78},
year = {2003}
}
@article{Goldin2000,
abstract = {A change in the audition procedures of symphony orchestras - adoption of "blind" auditions with a "screen" to conceal the candidate's identity from the jury -provides a test for sex-biased hiring. Using data from actual auditions, in an individual fixed-effects framework, we find that the screen increases the probability a woman will be advanced and hired. Although some of our estimates have large standard errors and there is one persistent effect in the opposite direction, the weight of the evidence suggests that the blind audition procedure fostered impartiality in hiring and increased the proportion women in symphony orchestras. (JEL J7, J16).},
author = {Goldin, Claudia and Rouse, Cecilia},
doi = {10.1257/aer.90.4.715},
issn = {00028282},
journal = {American Economic Review},
number = {4},
title = {{Orchestrating impartiality: The impact of "blind" auditions on female musicians}},
volume = {90},
year = {2000}
}
@inproceedings{Manik2019,
abstract = {Social sensing is about building reliable system on top of unreliable data by using recent and top-notch technologies. Demographic understanding in a social sensing system requires several tools such as location, age, or gender inference service. This paper presents an alternative method to infer gender of a person based on the name input and/or the profile photo input. The result shows that the formula improves the standalone text classifier and image classifier. The F1 score of the standalone text (name) classifier reaches 83.71{\%} by using the n-gram Logistic Regression model. On the other hand, the F1 score of the standalone image (profile photo with a facial feature) classifier reaches 98.14{\%} by using the Inception convolutional neural network model. If these two classifiers are combined, the F1 score is improved further, reaching 98.62{\%}.},
author = {Manik, Lindung Parningotan and Syafiandini, Arida Ferti and Mustika, Hani Febri and Akbar, Zaenal and Rianto, Yan},
booktitle = {2019 International Conference on Computer, Control, Informatics and its Applications: Emerging Trends in Big Data and Artificial Intelligence, IC3INA 2019},
doi = {10.1109/IC3INA48034.2019.8949589},
isbn = {9781728155401},
title = {{Gender Inference Based on Indonesian Name and Profile Photo}},
year = {2019}
}
@article{Le2016a,
author = {Le, Huy and Oh, In‐Sue and Schmidt, Frank L and Wooldridge, Colin D},
issn = {0031-5826},
journal = {Personnel Psychology},
number = {4},
pages = {975--1008},
publisher = {Wiley Online Library},
title = {{Correction for range restriction in meta‐analysis revisited: Improvements and implications for organizational research}},
volume = {69},
year = {2016}
}
@book{Lord1968,
address = {Cambridge, MA},
author = {Lord, F. I. and Novick, M R},
publisher = {Addison-Wesley},
title = {{Statistical theories of mental test scores}},
year = {1968}
}
@book{Rubin1987,
address = {New York, NY},
author = {Rubin, D B},
publisher = {Wiley},
title = {{Multiple imputation for nonresponse in surveys}},
year = {1987}
}
@article{Rumsey2002,
abstract = {In this paper, I will define statistical literacy (what it is and what it is not) and discuss how we can promote it in our introductory statistics courses, both in terms of teaching philosophy and curricular issues. I will discuss the important elements that comprise statistical literacy, and provide examples of how I promote each element in my courses. I will stress the importance of and ways to move beyond the "what" of statistics to the "how" and "why" of statistics in order to accomplish the goals of promoting good citizenship and preparing skilled research scientists. Copyright {\textcopyright} 2002 by Deborah J. Rumsey, all rights reserved.},
author = {Rumsey, Deborah J.},
doi = {10.1080/10691898.2002.11910678},
file = {::},
issn = {10691898},
journal = {Journal of Statistics Education},
keywords = {Introductory statistics,Statistical literacy},
number = {3},
publisher = {American Statistical Association},
title = {{Statistical literacy as a goal for introductory statistics courses}},
volume = {10},
year = {2002}
}
@techreport{Schield2006,
abstract = {In 2002, an international survey on reading graphs and tables of rates and percentages was conducted by the W. M. Keck Statistical Literacy Project. Respondents included US college students, college teachers worldwide and professional data analysts in the US and in South Africa. The survey focused on reading informal statistics-rates and percentages in tables and graphs. Some high error rates were encountered. In reading a 100{\%} row table, 44{\%} of students misread a description of a single percentage. In reading a pie chart, 53{\%} of data analysts misread a comparison of two slices. In reading an X-Y plot, 81{\%} of college teachers misread a "times more than" comparison. When asked if a Simpson's Paradox reversal was possible in an observational study, almost half of each group-students, data analysts and college teachers-disagreed. But helping students learn these skills takes considerable time. A new on-line tool has been developed to help students practice using ordinary English to describe and compare rates and percentages. This tool decreased the class time necessary to teach this skill and helped make it possible to teach statistical literacy online. Statistical educators now have both the rules and the tools to teach students how to read and interpret summary data. To avoid being charged with educational negligence, statistical educators should accept responsibility for establishing the grammatical rules for writing ordinary English descriptions and comparisons of rates and percentages and for teaching students to read and write such statements correctly.},
author = {Schield, Milo},
file = {::},
title = {{Statistical Literacy Survey Evaluation: Reading Graphs and Tables of Rates and Percentages}},
url = {www.StatLit.org/Survey.},
year = {2006}
}
@incollection{Gal2004,
author = {Gal, Iddo},
booktitle = {The Challenge of Developing Statistical Literacy, Reasoning and Thinking},
doi = {10.1007/1-4020-2278-6_3},
pages = {47--78},
publisher = {Springer Netherlands},
title = {{Statistical Literacy}},
year = {2004}
}
@article{Gal2002,
abstract = {Statistical literacy is a key ability expected of citizens in information-laden societies, and is often touted as an expected outcome of schooling and as a necessary component of adults'numeracy and literacy. Yet, its meaning and building blocks have received little explicit attention. This paper proposes a conceptualization of statistical literacy and describes its key components. Statistical literacy is portrayed as the ability to interpret, critically evaluate, and communicate about statistical information and messages. It is argued that statistically literate behavior is predicated on the joint activation of five interrelated knowledge bases (literacy, statistical, mathematical, context, and critical), together with a cluster of supporting dispositions and enabling beliefs. Educational and research implications are discussed, and responsibilities facing educators, statisticians, and other stakeholders are outlined.},
author = {Gal, Iddo},
doi = {10.1111/j.1751-5823.2002.tb00336.x},
file = {::},
issn = {0306-7734},
journal = {International Statistical Review},
keywords = {Adult education,Educational policy,Numeracy,Statistical reasoning,Statistics education},
month = {apr},
number = {1},
pages = {1--25},
publisher = {Wiley},
title = {{Adults' Statistical Literacy: Meanings, Components, Responsibilities}},
url = {http://doi.wiley.com/10.1111/j.1751-5823.2002.tb00336.x},
volume = {70},
year = {2002}
}
@techreport{Schield,
author = {Schield, Milo},
file = {::},
title = {{Statistical Literacy: Thinking Critically about Statistics}},
url = {www.augsburg.edu/ppages/schield}
}
@incollection{Dani2004,
abstract = {Ben-Zvi, D., {\&} Garfield, J. B. (2004). The challenge of developing statistical literacy, reasoning and thinking. Kluwer Academic Pub},
author = {Dani, Ben-Zvi and Joan, Garfield},
booktitle = {The Challenge of Developing Statistical Literacy, Reasoning and Thinking},
doi = {10.1007/1-4020-2278-6_1},
file = {::},
pages = {3--15},
publisher = {Springer Netherlands},
title = {{Statistical Literacy, Reasoning, and Thinking: Goals, Definitions, and Challenges}},
year = {2004}
}
@article{Bude2011a,
abstract = {Background.Education is aimed at students reaching conceptual understanding of the subject matter, because this leads to better performance and application of knowledge. Conceptual understanding depends on coherent and error-free knowledge structures. The construction of such knowledge structures can only be accomplished through active learning and when new knowledge can be integrated into prior knowledge. Aims.The intervention in this study was directed at both the activation of students as well as the integration of knowledge. Sample.Undergraduate university students from an introductory statistics course, in an authentic problem-based learning (PBL) environment, were randomly assigned to conditions and measurement time points. Method.In the PBL tutorial meetings, half of the tutors guided the discussions of the students in a traditional way. The other half guided the discussions more actively by asking directive and activating questions. To gauge conceptual understanding, the students answered open-ended questions asking them to explain and relate important statistical concepts. Results and conclusions.Results of the quantitative analysis show that providing directive tutor guidance improved understanding. Qualitative data of students' misconceptions seem to support this finding. Long-term retention of the subject matter seemed to be inadequate. {\textcopyright} 2010 The British Psychological Society.},
annote = {This article argues that having teaching assistance as directed questions improve statistical understanding. It does, but one of their main findings is that students really do not understand statistics, even the good ones. It also gives some helpful conceptual questions to ask at the end in the appendix.},
author = {Bud{\'{e}}, Luc and van de Wiel, Margaretha W. J. and Imbos, Tjaart and Berger, Martijn P. F.},
doi = {10.1348/000709910X513933},
file = {::},
issn = {00070998},
journal = {British Journal of Educational Psychology},
keywords = {teachers of statistics,teaching statistics, cannot be understood},
mendeley-tags = {teachers of statistics,teaching statistics, cannot be understood},
month = {jun},
number = {2},
pages = {309--324},
publisher = {John Wiley {\&} Sons, Ltd},
title = {{The effect of directive tutor guidance on students' conceptual understanding of statistics in problem-based learning}},
url = {http://doi.wiley.com/10.1348/000709910X513933},
volume = {81},
year = {2011}
}
@article{Gnanadesikan1997,
abstract = {So that students can acquire a conceptual understanding of basic statistical concepts, the orientation of the introductory statistics course must change from a lecture-and-listen format to one that engages students in active learning. This is the premise underlying an effort of the authors to produce and use a collection of hands-on activities that illustrate the basic concepts of statistics covered in most introductory college courses. Such activities promote the teaching of statistics more as an experimental science and less as a traditional course in mathematics. An activity-based approach enhances learning by improving the students' attention, motivation, and understanding. This paper presents examples of the types of activities that work well in various classroom settings along with comments from colleagues and students on their effectiveness.},
author = {Gnanadesikan, Mrudulla and Scheaffer, Richard L. and Watkins, Ann E. and Witmer, Jeffrey A.},
doi = {10.1080/10691898.1997.11910531},
file = {::},
issn = {1069-1898},
journal = {Journal of Statistics Education},
keywords = {Active learning,Data analysis,Experiment,Sample,Simulation},
month = {jul},
number = {2},
publisher = {Informa UK Limited},
title = {{An Activity-Based Statistics Course}},
url = {https://www.tandfonline.com/doi/full/10.1080/10691898.1997.11910531},
volume = {5},
year = {1997}
}
@article{Pfaff2009,
abstract = {This article describes the design, implementation, and assessment of four hands-on activities in an introductory college statistics course. In the activities, students investigated the ideas of the central limit theorem, confidence intervals, and hypothesis testing. Five assessments were administered to the students, one at the beginning and end of the course, and three in between the activities. We found that, despite our attempts to engage our students in active reflection, their performance on the assessments generally did not improve. These results raise important issues about the design of pedagogical tools and activities as well as the need to gather data to assess their effectiveness. {\textcopyright} 2009 by Thomas J. Pfaff and Aaron Weinberg.},
annote = {This article uses hands-on activities to try to teach student central limit theorem, confidence intervals, P values, and one other thing. Unfortunately, there seem to be no effect. Student still misunderstood these concepts. This is probably because the way we teach statistics is dumb.},
author = {Pfaff, Thomas J. and Weinberg, Aaron},
doi = {10.1080/10691898.2009.11889536},
file = {::},
issn = {1069-1898},
journal = {Journal of Statistics Education},
keywords = {Active learning,Central limit theorem,Confidence interval,Hands-on demonstration,Hypothesis testing},
month = {nov},
number = {3},
publisher = {American Statistical Association},
title = {{Do Hands-on Activities Increase Student Understanding?: A Case Study}},
url = {https://www.tandfonline.com/doi/full/10.1080/10691898.2009.11889536},
volume = {17},
year = {2009}
}
@article{Lunsford2006,
abstract = {We applied a classroom research model to investigate student understanding of sampling distributions of sample means and the Central Limit Theorem in post-calculus introductory probability and statistics courses. Using a quantitative assessment tool developed by previous researchers and a qualitative assessment tool developed by the authors, we embarked on data exploration of our students' responses on these assessments. We observed various trends regarding their understanding of the concepts including results that were consistent with research completed previously (by other authors) for algebra-based introductory level statistics students. We also used the information obtained from our data exploration and our experiences in the classroom to examine and conjecture about possible reasons for our results. Copyright {\textcopyright} 2006 by M. Leigh Lunsford, Ginger Holmes Rowell and Tracy Goodson-Espy all rights reserved.},
author = {Lunsford, M. Leigh and Rowell, Ginger Holmes and Goodson-Espy, Tracy},
doi = {10.1080/10691898.2006.11910587},
file = {::},
issn = {1069-1898},
journal = {Journal of Statistics Education},
keywords = {Action research},
month = {jan},
number = {3},
publisher = {American Statistical Association},
title = {{Classroom Research: Assessment of Student Understanding of Sampling Distributions of Means and the Central Limit Theorem in Post-Calculus Probability and Statistics Classes}},
url = {https://www.tandfonline.com/doi/full/10.1080/10691898.2006.11910587},
volume = {14},
year = {2006}
}
@inproceedings{Bude2006,
author = {Bud{\'{e}}, Luc},
booktitle = {ICOTS},
file = {::},
number = {7},
title = {{ASSESSING STUDENTS' UNDERSTANDING OF STATISTICS}},
year = {2006}
}
@article{Bude2011,
abstract = {In this study the effect of the reduced distribution of study activities on students' conceptual understanding of statistics is investigated in a quasi-experiment. Conceptual understanding depends on coherent and error free knowledge structures. Students need time to construct such knowledge structures. A curriculum reform at our university resulted in statistics courses which were considerably shortened in time, thereby limiting students' possibility to distribute study activities. Independent samples of students from before and after the reform were compared. To gauge conceptual understanding of statistics, students answered open ended questions in which they were asked to explain and relate important statistical concepts. It was shown that the reduction of distributed practice had a negative effect on students' understanding. The finding that condensed courses make it more difficult for students to reach proper understanding of the subject matter is of interest for anyone who is engaged in reforming curricula or designing courses. {\textcopyright} 2010 The Author(s).},
author = {Bud{\'{e}}, Luc and Imbos, Tjaart and van de Wiel, Margaretha W. and Berger, Martijn P.},
doi = {10.1007/s10734-010-9366-y},
file = {::},
issn = {00181560},
journal = {Higher Education},
keywords = {Conceptual understanding,Distributed practice,Knowledge acquisition,Problem based learning,Spacing effect,Statistics education},
month = {jul},
number = {1},
pages = {69--79},
publisher = {Springer},
title = {{The effect of distributed practice on students' conceptual understanding of statistics}},
volume = {62},
year = {2011}
}
@article{Campbell2020,
author = {Campbell, Harlan and Lakens, Dani{\"{e}}l},
doi = {10.1111/bmsp.12201},
file = {::},
issn = {0007-1102},
journal = {British Journal of Mathematical and Statistical Psychology},
keywords = {ANOVA,F test,equivalence testing,linear regression,non‐inferiority testing},
month = {feb},
pages = {bmsp.12201},
publisher = {John Wiley {\&} Sons, Ltd},
title = {{Can we disregard the whole model? Omnibus non‐inferiority testing for R 2 in multi‐variable linear regression and in ANOVA}},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/bmsp.12201},
year = {2020}
}
@misc{Oberauer2019,
abstract = {A worrying number of psychological findings are not replicable. Diagnoses of the causes of this “replication crisis,” and recommendations to address it, have nearly exclusively focused on methods of data collection, analysis, and reporting. We argue that a further cause of poor replicability is the often weak logical link between theories and their empirical tests. We propose a distinction between discovery-oriented and theory-testing research. In discovery-oriented research, theories do not strongly imply hypotheses by which they can be tested, but rather define a search space for the discovery of effects that would support them. Failures to find these effects do not question the theory. This endeavor necessarily engenders a high risk of Type I errors—that is, publication of findings that will not replicate. Theory-testing research, by contrast, relies on theories that strongly imply hypotheses, such that disconfirmation of the hypothesis provides evidence against the theory. Theory-testing research engenders a smaller risk of Type I errors. A strong link between theories and hypotheses is best achieved by formalizing theories as computational models. We critically revisit recommendations for addressing the “replication crisis,” including the proposal to distinguish exploratory from confirmatory research, and the preregistration of hypotheses and analysis plans.},
author = {Oberauer, Klaus and Lewandowsky, Stephan},
booktitle = {Psychonomic Bulletin and Review},
doi = {10.3758/s13423-019-01645-2},
file = {::},
issn = {15315320},
number = {5},
title = {{Addressing the theory crisis in psychology}},
volume = {26},
year = {2019}
}
@article{Savalei2020,
abstract = {AbstractCurrent computations of commonly used fit indices in structural equation modeling (SEM), such as RMSEA and CFI, indicate much better fit when the data are categorical than if the same data ...},
author = {Savalei, Victoria},
doi = {10.1080/00273171.2020.1717922},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
keywords = {CFI,Categorical data analysis,RMSEA,structural equation modeling},
month = {feb},
pages = {1--18},
publisher = {Routledge},
title = {{Improving Fit Indices in Structural Equation Modeling with Categorical Data}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2020.1717922},
year = {2020}
}
@misc{gender_diversity,
title = {3 reasons gender diversity is crucial to science},
url = {https://www.elsevier.com/connect/3-reasons-gender-diversity-is-crucial-to-science},
urldate = {2020-02-17}
}
@article{Eklund2012,
author = {Eklund, Aron},
journal = {R package version 0.1},
title = {{Beeswarm: the bee swarm plot, an alternative to stripchart}},
volume = {5},
year = {2012}
}
@article{Ceci2015,
abstract = {Audits of tenure-track hiring reveal faculty prefer to hire female applicants over males. However, audit data do not control for applicant quality, allowing some to argue women are hired at higher rates because they are more qualified. To test this, Williams and Ceci (2015) conducted an experiment demonstrating a preference for hiring women over identically-qualified men. While their findings are consistent with audits, they raise the specter that faculty may prefer women over even more-qualified men, a claim made recently. We evaluated this claim in the present study: 158 faculty ranked two men and one woman for a tenure-track-assistant professorship, and 94 faculty ranked two women and one man. In the former condition, the female applicant was slightly weaker than her two male competitors, although still strong; in the other condition the male applicant was slightly weaker than his two female competitors, although still strong. Faculty of both genders and in all fields preferred the more-qualified men over the slightly-less-qualified women, and they also preferred the stronger women over the slightly-less-qualified man. This suggests that preference for women among identically-qualified applicants found in experimental studies and in audits does not extend to women whose credentials are even slightly weaker than male counterparts. Thus these data give no support to the twin claims that weaker males are chosen over stronger females or weaker females are hired over stronger males.},
author = {Ceci, Stephen J. and Williams, Wendy M.},
doi = {10.3389/fpsyg.2015.01532},
file = {::},
issn = {16641078},
journal = {Frontiers in Psychology},
keywords = {Academic hiring,Affirmative action,Bias,Sexism,Women in science},
month = {oct},
number = {OCT},
pages = {1532},
publisher = {Frontiers Research Foundation},
title = {{Women have substantial advantage in STEM faculty hiring, except when competing against more-accomplished men}},
volume = {6},
year = {2015}
}
@article{Merkle2018,
abstract = {In this paper, we address the use of Bayesian factor analysis and structural equation models to draw inferences from experimental psychology data. While such application is non-standard, the models are generally useful for the unified analysis of multivariate data that stem from, e.g., subjects' responses to multiple experimental stimuli. We first review the models and the parameter identification issues inherent in the models. We then provide details on model estimation via JAGS and on Bayes factor estimation. Finally, we use the models to re-analyze experimental data on risky choice, comparing the approach to simpler, alternative methods.},
author = {Merkle, Edgar C. and Wang, Ting},
doi = {10.3758/s13423-016-1016-7},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
number = {1},
title = {{Bayesian latent variable models for the analysis of experimental psychology data}},
volume = {25},
year = {2018}
}
@misc{Camerer2018,
abstract = {Being able to replicate scientific findings is crucial for scientific progress1–15. We replicate 21 systematically selected experimental studies in the social sciences published in Nature and Science between 2010 and 201516–36. The replications follow analysis plans reviewed by the original authors and pre-registered prior to the replications. The replications are high powered, with sample sizes on average about five times higher than in the original studies. We find a significant effect in the same direction as the original study for 13 (62{\%}) studies, and the effect size of the replications is on average about 50{\%} of the original effect size. Replicability varies between 12 (57{\%}) and 14 (67{\%}) studies for complementary replicability indicators. Consistent with these results, the estimated true-positive rate is 67{\%} in a Bayesian analysis. The relative effect size of true positives is estimated to be 71{\%}, suggesting that both false positives and inflated effect sizes of true positives contribute to imperfect reproducibility. Furthermore, we find that peer beliefs of replicability are strongly related to replicability, suggesting that the research community could predict which results would replicate and that failures to replicate were not the result of chance alone.},
author = {Camerer, Colin F. and Dreber, Anna and Holzmeister, Felix and Ho, Teck Hua and Huber, J{\"{u}}rgen and Johannesson, Magnus and Kirchler, Michael and Nave, Gideon and Nosek, Brian A. and Pfeiffer, Thomas and Altmejd, Adam and Buttrick, Nick and Chan, Taizan and Chen, Yiling and Forsell, Eskil and Gampa, Anup and Heikensten, Emma and Hummer, Lily and Imai, Taisuke and Isaksson, Siri and Manfredi, Dylan and Rose, Julia and Wagenmakers, Eric Jan and Wu, Hang},
booktitle = {Nature Human Behaviour},
doi = {10.1038/s41562-018-0399-z},
issn = {23973374},
number = {9},
title = {{Evaluating the replicability of social science experiments in Nature and Science between 2010 and 2015}},
volume = {2},
year = {2018}
}
@article{Lakens2020,
abstract = {Researchers often conclude an effect is absent when a null-hypothesis significance test yields a nonsignificant p value. However, it is neither logically nor statistically correct to conclude an effect is absent when a hypothesis test is not significant. We present two methods to evaluate the presence or absence of effects: Equivalence testing (based on frequentist statistics) and Bayes factors (based on Bayesian statistics). In four examples from the gerontology literature, we illustrate different ways to specify alternative models that can be used to reject the presence of a meaningful or predicted effect in hypothesis tests. We provide detailed explanations of how to calculate, report, and interpret Bayes factors and equivalence tests. We also discuss how to design informative studies that can provide support for a null model or for the absence of a meaningful effect. The conceptual differences between Bayes factors and equivalence tests are discussed, and we also note when and why they might lead to similar or different inferences in practice. It is important that researchers are able to falsify predictions or can quantify the support for predicted null effects. Bayes factors and equivalence tests provide useful statistical tools to improve inferences about null effects.},
author = {Lakens, Dani{\"{e}}l and McLatchie, Neil and Isager, Peder M. and Scheel, Anne M. and Dienes, Zoltan},
doi = {10.1093/geronb/gby065},
issn = {17585368},
journal = {The journals of gerontology. Series B, Psychological sciences and social sciences},
number = {1},
title = {{Improving Inferences About Null Effects With Bayes Factors and Equivalence Tests}},
volume = {75},
year = {2020}
}
@article{Rigdon2019,
abstract = {Researchers have long been aware of the mathematics of factor indeterminacy. Yet, while occasionally discussed, the phenomenon is mostly ignored. In metrology, the measurement discipline of the physical sciences, uncertainty–distinct from both random error (but encompassing it) and systematic error–is a crucial characteristic of any measurement. This research argues that factor indeterminacy is uncertainty. Factor indeterminacy fundamentally threatens the validity of psychometric measurement, because it blurs the linkage between a common factor and the conceptual variable that the factor represents. Acknowledging and quantifying factor indeterminacy is important for progress in reducing this component of uncertainty in measurement, and thus improving psychological measurement over time. Based on our elaborations, we offer a range of recommendations toward achieving this goal.},
author = {Rigdon, Edward E. and Becker, Jan Michael and Sarstedt, Marko},
doi = {10.1080/00273171.2018.1535420},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Structural equation modeling,factor indeterminacy,measurement models,validity},
month = {may},
number = {3},
pages = {429--443},
publisher = {Routledge},
title = {{Factor Indeterminacy as Metrological Uncertainty: Implications for Advancing Psychological Measurement}},
volume = {54},
year = {2019}
}
@article{Genuer2010a,
abstract = {This paper proposes, focusing on random forests, the increasingly used statistical method for classification and regression problems introduced by Leo Breiman in 2001, to investigate two classical issues of variable selection. The first one is to find important variables for interpretation and the second one is more restrictive and try to design a good parsimonious prediction model. The main contribution is twofold: to provide some experimental insights about the behavior of the variable importance index based on random forests and to propose a strategy involving a ranking of explanatory variables using the random forests score of importance and a stepwise ascending variable introduction strategy. {\textcopyright} 2010 Elsevier B.V. All rights reserved.},
author = {Genuer, Robin and Poggi, Jean Michel and Tuleau-Malot, Christine},
doi = {10.1016/j.patrec.2010.03.014},
file = {::},
issn = {01678655},
journal = {Pattern Recognition Letters},
keywords = {Classification,High dimensional data,Random forests,Regression,Variable importance,Variable selection},
month = {oct},
number = {14},
pages = {2225--2236},
title = {{Variable selection using random forests}},
volume = {31},
year = {2010}
}
@article{Muthukrishna2019,
abstract = {The replication crisis facing the psychological sciences is widely regarded as rooted in methodological or statistical shortcomings. We argue that a large part of the problem is the lack of a cumulative theoretical framework or frameworks. Without an overarching theoretical framework that generates hypotheses across diverse domains, empirical programs spawn and grow from personal intuitions and culturally biased folk theories. By providing ways to develop clear predictions, including through the use of formal modelling, theoretical frameworks set expectations that determine whether a new finding is confirmatory, nicely integrating with existing lines of research, or surprising, and therefore requiring further replication and scrutiny. Such frameworks also prioritize certain research foci, motivate the use diverse empirical approaches and, often, provide a natural means to integrate across the sciences. Thus, overarching theoretical frameworks pave the way toward a more general theory of human behaviour. We illustrate one such a theoretical framework: dual inheritance theory.},
author = {Muthukrishna, Michael and Henrich, Joseph},
doi = {10.1038/s41562-018-0522-1},
file = {::},
issn = {23973374},
journal = {Nature Human Behaviour},
number = {3},
title = {{A problem in theory}},
volume = {3},
year = {2019}
}
@article{VanAert2018,
abstract = {The unrealistically high rate of positive results within psychology has increased the attention to replication research. However, researchers who conduct a replication and want to statistically combine the results of their replication with a statistically significant original study encounter problems when using traditional meta-analysis techniques. The original study's effect size is most probably overestimated because it is statistically significant, and this bias is not taken into consideration in traditional meta-analysis. We have developed a hybrid method that does take the statistical significance of an original study into account and enables (a) accurate effect size estimation, (b) estimation of a confidence interval, and (c) testing of the null hypothesis of no effect. We analytically approximate the performance of the hybrid method and describe its statistical properties. By applying the hybrid method to data from the Reproducibility Project: Psychology (Open Science Collaboration, 2015), we demonstrate that the conclusions based on the hybrid method are often in line with those of the replication, suggesting that many published psychological studies have smaller effect sizes than those reported in the original study, and that some effects may even be absent. We offer hands-on guidelines for how to statistically combine an original study and replication, and have developed a Web-based application (https://rvanaert.shinyapps.io/hybrid) for applying the hybrid method.},
author = {van Aert, Robbie C. M. and van Assen, Marcel A. L. M.},
doi = {10.3758/s13428-017-0967-6},
file = {::},
issn = {1554-3528},
journal = {Behavior Research Methods},
month = {aug},
number = {4},
pages = {1515--1539},
title = {{Examining reproducibility in psychology: A hybrid method for combining a statistically significant original study and a replication}},
url = {http://link.springer.com/10.3758/s13428-017-0967-6},
volume = {50},
year = {2018}
}
@article{SainzSujet2020,
author = {{Sainz Sujet}, Paola},
doi = {10.1080/10705511.2019.1703709},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {jan},
pages = {1--3},
title = {{Review of Data Visualisation: A Handbook for Data Driven Design}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1703709},
year = {2020}
}
@misc{Flora2012,
abstract = {We provide a basic review of the data screening and assumption testing issues relevant to exploratory and confirmatory factor analysis along with practical advice for conducting analyses that are sensitive to these concerns. Historically, factor analysis was developed for explaining the relationships among many continuous test scores, which led to the expression of the common factor model as a multivariate linear regression model with observed, continuous variables serving as dependent variables, and unobserved factors as the independent, explanatory variables. Thus, we begin our paper with a review of the assumptions for the common factor model and data screening issues as they pertain to the factor analysis of continuous observed variables. In particular, we describe how prin- ciples from regression diagnostics also apply to factor analysis. Next, because modern applications of factor analysis frequently involve the analysis of the individual items from a single test or questionnaire, an important focus of this paper is the factor analysis of items. Although the traditional linear factor model is well-suited to the analysis of con- tinuously distributed variables, commonly used item types, including Likert-type items, almost always produce dichotomous or ordered categorical variables. We describe how relationships among such items are often not well described by product-moment correla- tions, which has clear ramifications for the traditional linear factor analysis. An alternative, non-linear factor analysis using polychoric correlations has become more readily available to applied researchers and thus more popular. Consequently, we also review the assumptions and data-screening issues involved in this method.Throughout the paper, we demonstrate these procedures using an historic data set of nine cognitive ability variables. {\textcopyright} 2012 Flora, LaBrish and Chalmers.},
author = {Flora, David B. and LaBrish, Cathy and Chalmers, R. Philip},
booktitle = {Frontiers in Psychology},
doi = {10.3389/fpsyg.2012.00055},
file = {::},
issn = {16641078},
keywords = {Confirmatory factor analysis,Exploratory factor analysis,Item factor analysis,Structural equation},
number = {MAR},
title = {{Old and new ideas for data screening and assumption testing for exploratory and confirmatory factor analysis}},
volume = {3},
year = {2012}
}
@article{Mavridis2017,
author = {Mavridis, Dimitris and Moustaki, Irini and Wall, Melanie and Salanti, Georgia},
doi = {10.1002/jrsm.1197},
file = {::},
issn = {17592879},
journal = {Research Synthesis Methods},
month = {jun},
number = {2},
pages = {199--211},
title = {{Detecting outlying studies in meta-regression models using a forward search algorithm}},
url = {http://doi.wiley.com/10.1002/jrsm.1197},
volume = {8},
year = {2017}
}
@article{Smid2020,
author = {Smid, Sanne C. and McNeish, Daniel and Mio{\v{c}}evi{\'{c}}, Milica and van de Schoot, Rens},
doi = {10.1080/10705511.2019.1577140},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {jan},
number = {1},
pages = {131--161},
title = {{Bayesian Versus Frequentist Estimation for Structural Equation Models in Small Sample Contexts: A Systematic Review}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1577140},
volume = {27},
year = {2020}
}
@article{Giordano2019,
author = {Giordano, Casey and Waller, Niels G.},
doi = {10.1080/00273171.2019.1697864},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
publisher = {Routledge},
title = {{A Neglected Aspect of the Reproducibility Crisis: Factor Analytic Monte Carlo Studies}},
year = {2019}
}
@article{Shi2019,
abstract = {This study introduces the statistical theory of using the Standardized Root Mean Squared Error (SRMR) to test close fit in ordinal factor analysis. We also compare the accuracy of confidence intervals (CIs) and tests of close fit based on the SRMR with those obtained based on the Root Mean Squared Error of Approximation (RMSEA). The current (biased) implementation for the RMSEA never rejects that a model fits closely when data are binary and almost invariably rejects the model in large samples if data consist of five categories. The unbiased RMSEA produces better rejection rates, but it is only accurate enough when the number of variables is small and the degree of misfit is small. In contrast, across all simulated conditions, the tests of close fit based on the SRMR yield acceptable type I error rates. SRMR tests of close fit are also more powerful than those using the unbiased RMSEA.},
author = {Shi, Dexin and Maydeu-Olivares, Alberto and Rosseel, Yves},
doi = {10.1080/10705511.2019.1611434},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Ordinal factor analysis,RMSEA,SRMR,close fit},
month = {jan},
publisher = {Routledge},
title = {{Assessing Fit in Ordinal Factor Analysis Models: SRMR vs. RMSEA}},
year = {2019}
}
@article{Bauer2019,
abstract = {Determining whether measures are equally valid for all individuals is a core component of psychometric analysis. Traditionally, the evaluation of measurement invariance (MI) involves comparing independent groups defined by a single categorical covariate (e.g., men and women) to determine if there are any items that display differential item functioning (DIF). More recently, Moderated Nonlinear Factor Analysis (MNLFA) has been advanced as an approach for evaluating MI/DIF simultaneously over multiple background variables, categorical and continuous. Unfortunately, conventional procedures for detecting DIF do not scale well to the more complex MNLFA. The current manuscript therefore proposes a regularization approach to MNLFA estimation that penalizes the likelihood for DIF parameters (i.e., rewarding sparse DIF). This procedure avoids the pitfalls of sequential inference tests, is automated for end users, and is shown to perform well in both a small-scale simulation and an empirical validation study.},
author = {Bauer, Daniel J. and Belzak, William C.M. and Cole, Veronica T.},
doi = {10.1080/10705511.2019.1642754},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Measurement invariance,differential item functioning,moderated nonlinear factor analysis,regularization},
month = {jan},
publisher = {Routledge},
title = {{Simplifying the Assessment of Measurement Invariance over Multiple Background Variables: Using Regularized Moderated Nonlinear Factor Analysis to Detect Differential Item Functioning}},
year = {2019}
}
@article{Barendse2020,
author = {Barendse, M.T. and Rosseel, Y.},
doi = {10.1080/10705511.2019.1689366},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {jan},
pages = {1--26},
title = {{Multilevel Modeling in the ‘Wide Format' Approach with Discrete Data: A Solution for Small Cluster Sizes}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1689366},
year = {2020}
}
@article{Bollen2019,
abstract = {Structural misspecifications in factor analysis include using the wrong number of factors and omitting cross loadings or correlated errors. The impact of these errors on factor loading estimates is under-studied. Factor loadings underlie our assessments of the validity and reliability of indicators. Thus knowing how structural misspecifications affect a factor loading is a key issue. This paper develops analytic conditions of when misspecifications affect Bollen's (1996) model implied instrumental variable, two stage least squares (MIIV-2SLS) estimator of a factor loading. It shows that if an indicator equation is correctly specified, then correlated errors among other measures, mixing up causal indicators with reflective, omitting cross loadings, and omitting direct effects between indicators leave the MIIV-2SLS estimator of the factor loading unchanged. Alternatively, if the indicator or the scaling indicator equation is misspecified, then the loading is unlikely to be robust. The results are illustrated with hypothetical and empirical examples.},
author = {Bollen, Kenneth A},
doi = {10.1080/10705511.2019.1691005},
issn = {1532-8007},
keywords = {Model implied instrumental variables,factor analysis,misspecified models,two stage least squares},
title = {{Structural Equation Modeling: A Multidisciplinary Journal When Good Loadings Go Bad: Robustness in Factor Analysis When Good Loadings Go Bad: Robustness in Factor Analysis}},
url = {https://www.tandfonline.com/action/journalInformation?journalCode=hsem20},
year = {2019}
}
@article{Grimes2018,
abstract = {Scientific publication is immensely important to the scientific endeavour. There is, however, concern that rewarding scientists chiefly on publication creates a perverse incentive, allowing careless and fraudulent conduct to thrive, compounded by the predisposition of top-tier journals towards novel, positive findings rather than investigations confirming null hypothesis. This potentially compounds a reproducibility crisis in several fields, and risks undermining science and public trust in scientific findings. To date, there has been comparatively little modelling on factors that influence science trustworthiness, despite the importance of quantifying the problem. We present a simple phenomenological model with cohorts of diligent, careless and unethical scientists, with funding allocated by published outputs. This analysis suggests that trustworthiness of published science in a given field is influenced by false positive rate, and pressures for positive results. We find decreasing available funding has negative consequences for resulting trustworthiness, and examine strategies to combat propagation of irreproducible science.},
annote = {This paper concludes that there's a natural selection of bad science and that high impact journals produce more bad science than low impact journals. All done using computational modeling. Also gives a lot of great references for questionable research practices across disciplines. },
author = {Grimes, David Robert and Bauch, Chris T. and Ioannidis, John P.A.},
doi = {10.1098/rsos.171511},
file = {::},
issn = {20545703},
journal = {Royal Society Open Science},
keywords = {Bias,Ethics,Ethics, conflict of interest,change: top-down,competing goals of a study,conflict of interest,ethics,grant funding: problems of,open access,reform,reporting practices},
mendeley-tags = {Bias,Ethics,Ethics, conflict of interest,change: top-down,competing goals of a study,conflict of interest,ethics,grant funding: problems of,open access,reform,reporting practices},
number = {1},
title = {{Modelling science trustworthiness under publish or perish pressure}},
volume = {5},
year = {2018}
}
@article{Nissen2016,
abstract = {Science is facing a “replication crisis” in which many experimental findings cannot be replicated and are likely to be false. Does this imply that many scientific facts are false as well? To find out, we explore the process by which a claim becomes fact. We model the community's confidence in a claim as a Markov process with successive published results shifting the degree of belief. Publication bias in favor of positive findings influences the distribution of published results. We find that unless a sufficient fraction of negative results are published, false claims frequently can become canonized as fact. Data-dredging, p-hacking, and similar behaviors exacerbate the problem. Should negative results become easier to publish as a claim approaches acceptance as a fact, however, true and false claims would be more readily distinguished. To the degree that the model reflects the real world, there may be serious concerns about the validity of purported facts in some disciplines.},
archivePrefix = {arXiv},
arxivId = {1609.00494},
author = {Nissen, Silas Boye and Magidson, Tali and Gross, Kevin and Bergstrom, Carl T.},
doi = {10.7554/eLife.21451},
eprint = {1609.00494},
file = {::},
issn = {2050084X},
journal = {eLife},
keywords = {NHST: falsification failure,converging evidence, multiple studies,meta-analysis: evidence against,multiple studies,null hypothesis, bias against,tradeoff of scientific goals},
mendeley-tags = {NHST: falsification failure,converging evidence, multiple studies,meta-analysis: evidence against,multiple studies,null hypothesis, bias against,tradeoff of scientific goals},
number = {DECEMBER2016},
title = {{Publication bias and the canonization of false facts}},
volume = {5},
year = {2016}
}
@incollection{Wainer2010,
address = {Redlands, CA},
author = {Wainer, Howard},
booktitle = {Semiology of Graphics: Diagrams, Networks, Maps},
edition = {2nd},
editor = {Berkson, Joseph},
pages = {ix--x},
publisher = {ESRI Press},
title = {{Prelude}},
year = {2010}
}
@inproceedings{Inbar2007,
author = {Inbar, Ohad and Tractinsky, Noam and Meyer, Joachim},
booktitle = {ECCE},
pages = {185--188},
title = {{Minimalism in information visualization: attitudes towards maximizing the data-ink ratio.}},
volume = {7},
year = {2007}
}
@article{Marieb1992,
author = {Marieb, Elaine N},
journal = {CA: Benjamin/Cummings Publishing Company, Ine},
number = {992},
pages = {306--307},
title = {{Human anatomy and physiology. Redwood City}},
volume = {1},
year = {1992}
}
@inproceedings{Pandey2015,
annote = {This is supposedly the first article that does an empirical valuation of deceptive techniques. Gives lotta great references about how graphics can be used deceptively},
author = {Pandey, Anshul Vikram and Rall, Katharina and Satterthwaite, Margaret L and Nov, Oded and Bertini, Enrico},
booktitle = {Proceedings of the 33rd Annual ACM Conference on Human Factors in Computing Systems},
file = {::},
isbn = {1450331459},
keywords = {graphics: ability to lie,graphics: condensing information,visualization},
mendeley-tags = {graphics: ability to lie,graphics: condensing information,visualization},
pages = {1469--1478},
publisher = {ACM},
title = {{How deceptive are deceptive visualizations?: An empirical analysis of common distortion techniques}},
year = {2015}
}
@book{Semetko2012,
author = {Semetko, Holli A and Scammell, Margaret},
isbn = {1847874398},
publisher = {Sage Publications},
title = {{The SAGE handbook of political communication}},
year = {2012}
}
@book{Tufte2001,
address = {Cheshire, CT},
author = {Tufte, Edward R},
publisher = {Graphics press},
title = {{The visual display of quantitative information}},
volume = {2},
year = {2001}
}
@article{Lai2019,
abstract = {RMSEA estimation given nonnormal continuous data is usually based on the mean-adjusted (TM) or mean-variance-adjusted (TMV) chi-square statistic, but a plain application of these statistics has poor performance. Savalei and colleagues gave a better way (the BSL method) to infer RMSEA using TMor TMV. However, the BSL method is applicable to continuous data only. For categorical data, currently RMSEA inference is still based on a plain application of TMor TMV, but such practice is already problematic under continuous data. In this paper, we first show that it is more meaningful to define RMSEA under unweighted least squares (ULS) than under weighted least squares (WLS) or diagonally weighted least squares (DWLS). Then, we propose a correct point estimator and confidence interval for RMSEA given categorical data and ULS. Simulation results show our methods perform well while all the traditional methods break down.},
author = {Lai, Keke},
doi = {10.1080/10705511.2019.1687302},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {The root mean square error of approximation,categorical data,confidence interval,weighted least squares},
publisher = {Routledge},
title = {{Correct Point Estimator and Confidence Interval for RMSEA Given Categorical Data}},
year = {2019}
}
@article{Wang2019,
abstract = {We propose a Corrected Goodness-of-Fit Index (CGFI) for model evaluation in Structural Equation Modeling (SEM). The CGFI is essentially a corrected index that takes into account model complexity and downward bias caused by small sample size. Using simulations based on pre-set SEM models, we compared the properties of CGFI, Goodness-of-Fit (GFI), and Adjusted Goodness-of-Fit Index (AGFI) under different settings of sample size, estimation method, magnitude of factor loadings, model complexity, and types and degrees of model misspecification. We find that the CGFI is more stable across different sample sizes and much more sensitive to detect model misspecification than the GFI and AGFI. We recommend a critical value of 0.90 for the proposed CGFI to evaluate the goodness of fit of SEM. Our proposed CGFI is easy to implement and can serve as a useful supplementary fit index to existing ones.},
author = {Wang, Kai and Xu, Ying and Wang, Chaolong and Tan, Ming and Chen, Pingyan},
doi = {10.1080/10705511.2019.1695213},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Goodness-of-Fit Index,Monte Carlo simulation,Structural equation modeling,sample size},
publisher = {Routledge},
title = {{A Corrected Goodness-of-Fit Index (CGFI) for Model Evaluation in Structural Equation Modeling}},
year = {2019}
}
@article{Cramer2016,
abstract = {Many psychologists do not realize that exploratory use of the popular multiway analysis of variance harbors a multiple-comparison problem. In the case of two factors, three separate null hypotheses are subject to test (i.e., two main effects and one interaction). Consequently, the probability of at least one Type I error (if all null hypotheses are true) is 14 {\%} rather than 5 {\%}, if the three tests are independent. We explain the multiple-comparison problem and demonstrate that researchers almost never correct for it. To mitigate the problem, we describe four remedies: the omnibus F test, control of the familywise error rate, control of the false discovery rate, and preregistration of the hypotheses.},
archivePrefix = {arXiv},
arxivId = {1412.3416},
author = {Cramer, Ang{\'{e}}lique O.J. and van Ravenzwaaij, Don and Matzke, Dora and Steingroever, Helen and Wetzels, Ruud and Grasman, Raoul P.P.P. and Waldorp, Lourens J. and Wagenmakers, Eric Jan},
doi = {10.3758/s13423-015-0913-5},
eprint = {1412.3416},
file = {::},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
keywords = {Benjamini–Hochberg procedure,Factorial ANOVA,False discovery rate,Familywise error rate,Multiple comparison problem,Multiway ANOVA,Preregistration,Sequential Bonferroni,Type I error},
month = {apr},
number = {2},
pages = {640--647},
publisher = {Springer New York LLC},
title = {{Hidden multiplicity in exploratory multiway ANOVA: Prevalence and remedies}},
volume = {23},
year = {2016}
}
@article{Umanath1994,
author = {Umanath, Narayan S. and Vessey, Iris},
doi = {10.1111/j.1540-5915.1994.tb01870.x},
issn = {0011-7315},
journal = {Decision Sciences},
month = {sep},
number = {5-6},
pages = {795--824},
title = {{Multiattribute Data Presentation and Human Judgment: A Cognitive Fit Perspective}},
url = {http://doi.wiley.com/10.1111/j.1540-5915.1994.tb01870.x},
volume = {25},
year = {1994}
}
@article{Vessey1991,
author = {Vessey, Iris},
doi = {10.1111/j.1540-5915.1991.tb00344.x},
issn = {0011-7315},
journal = {Decision Sciences},
month = {mar},
number = {2},
pages = {219--240},
title = {{Cognitive Fit: A Theory-Based Analysis of the Graphs Versus Tables Literature}},
url = {http://doi.wiley.com/10.1111/j.1540-5915.1991.tb00344.x},
volume = {22},
year = {1991}
}
@article{Anscombe1973a,
author = {Anscombe, F. J.},
doi = {10.1080/00031305.1973.10478966},
issn = {15372731},
journal = {American Statistician},
number = {1},
pages = {17--21},
title = {{Graphs in statistical analysis}},
volume = {27},
year = {1973}
}
@article{Allen2019,
abstract = {The movement towards open science is a consequence of seemingly pervasive failures to replicate previous research. This transition comes with great benefits but also significant challenges that are likely to affect those who carry out the research, usually early career researchers (ECRs). Here, we describe key benefits, including reputational gains, increased chances of publication, and a broader increase in the reliability of research. The increased chances of publication are supported by exploratory analyses indicating null findings are substantially more likely to be published via open registered reports in comparison to more conventional methods. These benefits are balanced by challenges that we have encountered and that involve increased costs in terms of flexibility, time, and issues with the current incentive structure, all of which seem to affect ECRs acutely. Although there are major obstacles to the early adoption of open science, overall open science practices should benefit both the ECR and improve the quality of research. We review 3 benefits and 3 challenges and provide suggestions from the perspective of ECRs for moving towards open science practices, which we believe scientists and institutions at all levels would do well to consider.},
author = {Allen, Christopher and Mehler, David M.A.},
doi = {10.1371/journal.pbio.3000246},
file = {::},
issn = {15457885},
journal = {PLoS Biology},
number = {5},
title = {{Open science challenges, benefits and tips in early career and beyond}},
volume = {17},
year = {2019}
}
@article{Morey2016a,
abstract = {A core aspect of science is using data to assess the degree to which data provide evidence for competing claims, hypotheses, or theories. Evidence is by definition something that should change the credibility of a claim in a reasonable person's mind. However, common statistics, such as significance testing and confidence intervals have no interface with concepts of belief, and thus it is unclear how they relate to statistical evidence. We explore the concept of statistical evidence, and how it can be quantified using the Bayes factor. We also discuss the philosophical issues inherent in the use of the Bayes factor.},
author = {Morey, Richard D. and Romeijn, Jan Willem and Rouder, Jeffrey N.},
doi = {10.1016/j.jmp.2015.11.001},
issn = {10960880},
journal = {Journal of Mathematical Psychology},
title = {{The philosophy of Bayes factors and the quantification of statistical evidence}},
volume = {72},
year = {2016}
}
@article{Lakens2017,
abstract = {Psychology journals rarely publish nonsignificant results. At the same time, it is often very unlikely (or “too good to be true”) that a set of studies yields exclusively significant results. Here, we use likelihood ratios to explain when sets of studies that contain a mix of significant and nonsignificant results are likely to be true or “too true to be bad.” As we show, mixed results are not only likely to be observed in lines of research but also, when observed, often provide evidence for the alternative hypothesis, given reasonable levels of statistical power and an adequately controlled low Type 1 error rate. Researchers should feel comfortable submitting such lines of research with an internal meta-analysis for publication. A better understanding of probabilities, accompanied by more realistic expectations of what real sets of studies look like, might be an important step in mitigating publication bias in the scientific literature.},
author = {Lakens, Dani{\"{e}}l and Etz, Alexander J.},
doi = {10.1177/1948550617693058},
issn = {19485514},
journal = {Social Psychological and Personality Science},
number = {8},
title = {{Too True to be Bad: When Sets of Studies With Significant and Nonsignificant Findings Are Probably True}},
volume = {8},
year = {2017}
}
@article{Rodgers2019,
author = {Rodgers, Joseph Lee},
doi = {10.1177/2515245919882050},
file = {::},
isbn = {2515245919},
journal = {Advances in Methods and Practices in Psychological Science},
keywords = {14,19,5,data,degrees of freedom,received 7,revision accepted 9,statistical history,teaching statistics},
title = {{Degrees of Freedom at the Start of the Second 100 Years : A Pedagogical Treatise}},
year = {2019}
}
@article{showdots,
doi = {10.1038/s41551-017-0079},
file = {::},
issn = {2157846X},
journal = {Nature Biomedical Engineering},
month = {may},
number = {5},
publisher = {Nature Publishing Group},
title = {{Show the dots in plots}},
volume = {1},
year = {2017}
}
@article{Anderson2019,
author = {Anderson, Samantha F.},
doi = {10.1037/met0000248},
file = {::},
journal = {Psychological Methods},
title = {{Misinterpreting p: The discrepancy between p values and the probability the null hypothesis is true, the influence of multiple testing, and implications for the replication crisis}},
url = {Advanced online publication},
year = {2019}
}
@book{Rodgers2020,
address = {London},
author = {Rodgers, Joseph Lee},
editor = {Rodgers, Joseph Lee},
publisher = {Taylor {\&} Francis},
title = {{Teaching statistics and quantitative methods in the 21st century}},
year = {2020}
}
@article{Kraemer,
author = {Kraemer, M. and Rodgers, Joseph Lee},
journal = {Journal of Personality and Social Psychology},
title = {{How Does First Childbirth Affect Trajectories of Domain-Specific Life Satisfaction? A Quasi-Experimental Investigation Using the SOEP Data}}
}
@book{Cohen1988,
address = {Hillsdale, NJ},
author = {Cohen, Jacob},
edition = {2nd},
publisher = {Lawrence Earlbaum Associates},
title = {{Statistical power analysis for the behavioral sciences}},
year = {1988}
}
@article{Steiger2004,
abstract = {This article presents confidence interval methods for improving on the standard F tests in the balanced, completely between-subjects, fixed-effects analysis of variance. Exact confidence intervals for omnibus effect size measures, such as $\omega$ 2 and the root-mean-square standardized effect, provide all the information in the traditional hypothesis test and more. They allow one to test simultaneously whether overall effects are (a) zero (the traditional test), (b) trivial (do not exceed some small value), or (c) nontrivial (definitely exceed some minimal level). For situations in which single-degree-of-freedom contrasts are of primary interest, exact confidence interval methods for contrast effect size measures such as the contrast correlation are also provided.},
author = {Steiger, James H.},
doi = {10.1037/1082-989X.9.2.164},
issn = {1082989X},
journal = {Psychological Methods},
month = {jun},
number = {2},
pages = {164--182},
title = {{Beyond the F test: Effect size confidence intervals and tests of close fit in the analysis of variance and contrast analysis}},
volume = {9},
year = {2004}
}
@techreport{Asparouhov2017,
author = {Asparouhov, Tihomir and Muth{\'{e}}n, Bengt},
file = {::},
title = {{Using Mplus individual residual plots for diagnostics and model evaluation in SEM}},
year = {2017}
}
@article{Finch2015,
abstract = {Researchers have devoted some time and effort to developing methods for fitting nonlinear relationships among latent variables. In particular, most of these have focused on correctly modeling interactions between 2 exogenous latent variables, and quadratic relationships between exogenous and endogenous variables. All of these approaches require prespecification of the nonlinearity by the researcher, and are limited to fairly simple nonlinear relationships. Other work has been done using mixture structural equation models (SEMM) in an attempt to fit more complex nonlinear relationships. This study expands on this earlier work by introducing the 2-stage generalized additive model (2SGAM) approach for fitting regression splines in the context of structural equation models. The model is first described and then investigated through the use of simulated data, in which it was compared with the SEMM approach. Results demonstrate that the 2SGAM is an effective tool for fitting a variety of nonlinear relationships between latent variables, and can be easily and accurately extended to models including multiple latent variables. Implications of these results are discussed.},
annote = {Interesting article. This combines bollens approach of turning latent variables into observed variables and then using instrumental variables to eliminate bias, with cubic smoothing splines. It seems to work quite well and is quite flexible.},
author = {Finch, W. Holmes},
doi = {10.1080/10705511.2014.935749},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {finite mixture SEM,generalized additive model,nonlinear SEM},
number = {1},
pages = {60--75},
title = {{Modeling Nonlinear Structural Equation Models: A Comparison of the Two-Stage Generalized Additive Models and the Finite Mixture Structural Equation Model}},
volume = {22},
year = {2015}
}
@article{Anderson2017,
abstract = {Psychology is undergoing a replication crisis. The discussion surrounding this crisis has centered on mistrust of previous findings. Researchers planning replication studies often use the original study sample effect size as the basis for sample size planning. However, this strategy ignores uncertainty and publication bias in estimated effect sizes, resulting in overly optimistic calculations. A psychologist who intends to obtain power of.80 in the replication study, and performs calculations accordingly, may have an actual power lower than.80. We performed simulations to reveal the magnitude of the difference between actual and intended power based on common sample size planning strategies and assessed the performance of methods that aim to correct for effect size uncertainty and/or bias. Our results imply that even if original studies reflect actual phenomena and were conducted in the absence of questionable research practices, popular approaches to designing replication studies may result in a low success rate, especially if the original study is underpowered. Methods correcting for bias and/or uncertainty generally had higher actual power, but were not a panacea for an underpowered original study. Thus, it becomes imperative that 1) original studies are adequately powered and 2) replication studies are designed with methods that are more likely to yield the intended level of power.},
author = {Anderson, Samantha F. and Maxwell, Scott E.},
doi = {10.1080/00273171.2017.1289361},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Effect size,power,replication,sample size planning},
number = {3},
pages = {305--324},
title = {{Addressing the “Replication Crisis”: Using Original Studies to Design Replication Studies with Appropriate Statistical Power}},
volume = {52},
year = {2017}
}
@article{Bauer2005,
abstract = {Measurement invariance is a necessary condition for the evaluation of factor mean differences over groups or time. This article considers the potential problems that can arise for tests of measurement invariance when the true factor-to-indicator relationship is nonlinear (quadratic) and invariant but the linear factor model is nevertheless applied. The factor loadings and indicator intercepts of the linear model will diverge across groups as the factor mean difference increases. Power analyses show that even apparently small quadratic effects can result in rejection of measurement invariance at moderate sample sizes when the factor mean difference is medium to large. Recommendations include the identification of nonlinear relationships using diagnostic plots and consideration of newly developed methods for fitting nonlinear factor models. Copyright 2005 by the American Psychological Association.},
author = {Bauer, Daniel J.},
doi = {10.1037/1082-989X.10.3.305},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Factor analysis,Invariance,Measurement,Nonlinear,Structural equation modeling},
number = {3},
pages = {305--316},
title = {{The role of nonlinear factor-to-indicator relationships in tests of measurement equivalence}},
volume = {10},
year = {2005}
}
@article{Stone2017,
abstract = {OBJECTIVES: To assess the association of smoking habits with the clinical, serological, and histopathological manifestations of Sj{\"{o}}gren's syndrome (SS) and non-Sj{\"{o}}gren's sicca (non-SS sicca).METHODS: Cross-sectional case-control study of 1288 patients with sicca symptoms (587 SS and 701 non-SS sicca) evaluated in a multi-disciplinary research clinic. Smoking patterns were obtained from questionnaire data and disease-related clinical and laboratory data were compared between current, past, ever, and never smokers.RESULTS: Current smoking rates were 4.6{\%} for SS patients compared to 14.1{\%} in non-SS sicca (p = 5.17x10E-09), 18{\%} in a local lupus cohort (p = 1.13x10E-14) and 16.8{\%} in the community (p = 4.12x10E-15). Current smoking was protective against SS classification (OR 0.35, 95{\%}CI 0.22-0.56, FDR q = 1.9E10-05), focal lymphocytic sialadenitis (OR 0.26, 95{\%}CI 0.15-0.44, FDR q = 1.52x10E-06), focus score ≥1 (OR 0.22, 95{\%}CI 0.13-0.39, FDR q = 1.43x10E-07), and anti-Ro/SSA(+) (OR 0.36, 95{\%}CI 0.2-0.64, FDR q = 0.0009); ever smoking was protective against the same features and against anti-La/SSB(+) (OR 0.52, 95{\%}CI 0.39-0.70, FDR q = 5.82x10E-05). Duration of smoking was inversely correlated with SS even after controlling for socioeconomic status, BMI, alcohol and caffeine consumption.CONCLUSIONS: Current tobacco smoking is negatively and independently associated with SS, protecting against disease-associated humoral and cellular autoimmunity. The overall smoking rate amongst SS patients is significantly lower than in matched populations and the effects of smoking are proportional to exposure duration. In spite of the protective effects of tobacco on SS manifestations, it is associated with other serious comorbidities such as lung disease, cardiovascular risk and malignancy, and should thus be strongly discouraged in patients with sicca.},
author = {Stone, D.U. and Fife, Dustin A. and Brown, M. and Earley, K.E. and Radfar, L. and Kaufman, C.E. and Lewis, D.M. and Rhodus, N.L. and Segal, B.M. and Wallace, D.J. and Weisman, M.H. and Venuturupalli, S. and Brennan, M.T. and Lessard, C.J. and Montgomery, C.G. and Scofield, R.H. and Sivils, K.L. and Rasmussen, A.},
doi = {10.1371/journal.pone.0170249},
issn = {19326203},
journal = {PloS one},
number = {2},
title = {{Effect of Tobacco Smoking on The Clinical, Histopathological, and Serological Manifestations of Sj{\"{o}}gren's Syndrome}},
volume = {12},
year = {2017}
}
@article{Ritterhouse2014b,
abstract = {Objective: In recent years, vitamin D has been shown to possess a wide range of immunomodulatory effects. Although there is extensive amount of research on vitamin D, we lack a comprehensive understanding of the prevalence of vitamin D deficiency or the mechanism by which vitamin D regulates the human immune system. This study examined the prevalence and correlates of vitamin D deficiency and the relationship between vitamin D and the immune system in healthy individuals. Methods: Healthy individuals (n = 774) comprised of European-Americans (EA, n = 470), African Americans (AA, n = 125), and Native Americans (NA, n = 179) were screened for 25-hydroxyvitamin D [25(OH)D] levels by ELISA. To identify the most noticeable effects of vitamin D on the immune system, 20 EA individuals with severely deficient ({\textless}11.3 ng/mL) and sufficient ({\textgreater}24.8 ng/mL) vitamin D levels were matched and selected for further analysis. Serum cytokine level measurement, immune cell phenotyping, and phosphoflow cytometry were performed. Results: Vitamin D sufficiency was observed in 37.5{\%} of the study cohort. By multivariate analysis, AA, NA, and females with a high body mass index (BMI, {\textgreater}30) demonstrate higher rates of vitamin D deficiency (p,0.05). Individuals with vitamin D deficiency had significantly higher levels of serum GM-CSF (p = 0.04), decreased circulating activated CD4+ (p = 0.04) and CD8+ T (p = 0.04) cell frequencies than individuals with sufficient vitamin D levels. Conclusion: A large portion of healthy individuals have vitamin D deficiency. These individuals have altered T and B cell responses, indicating that the absence of sufficient vitamin D levels could result in undesirable cellular and molecular alterations ultimately contributing to immune dysregulation. {\textcopyright} 2014 Ritterhouse et al.},
author = {Ritterhouse, L.L. and Lu, R. and Shah, H.B. and Robertson, J.M. and Fife, Dustin A. and Maecker, H.T. and Du, H. and Fathman, C.G. and Chakravarty, E.F. and Scofield, R.H. and Kamen, D.L. and Guthridge, J.M. and James, J.A.},
doi = {10.1371/journal.pone.0094500},
issn = {19326203},
journal = {PLoS ONE},
number = {4},
title = {{Vitamin D deficiency in a multiethnic healthy control cohort and altered immune response in vitamin D deficient European-American healthy controls}},
volume = {9},
year = {2014}
}
@article{Munroe2016a,
abstract = {{\textcopyright} 2016 Published by the BMJ Publishing Group Limited. Objectives The relationship of immune dysregulation and autoantibody production that may contribute to systemic lupus erythematosus (SLE) pathogenesis is unknown. This study evaluates the individual and combined contributions of autoantibodies, type I interferon (IFN-$\alpha$) activity, and IFN-associated soluble mediators to disease development leading to SLE. Methods Serial serum specimens from 55 individuals collected prior to SLE classification (average timespan=4.3a €...years) and unaffected healthy controls matched by age (±5a €...years), gender, race and time of sample procurement were obtained from the Department of Defense Serum Repository. Levels of serum IFN-$\alpha$ activity, IFN-associated mediators and autoantibodies were evaluated and temporal relationships assessed by growth curve modelling, path analysis, analysis of covariance and random forest models. Results In cases, but not matched controls, autoantibody specificities and IFN-associated mediators accumulated over a period of years, plateauing near the time of disease classification (p{\textless}0.001). Autoantibody positivity coincided with or followed type II IFN dysregulation, preceding IFN-$\alpha$ activity in growth curve models, with elevated IFN-$\alpha$ activity and B-lymphocyte stimulator levels occurring shortly before SLE classification (p≤0.005). Cases were distinguished by multivariate random forest models incorporating IFN- 3, macrophage chemoattractant protein (MCP)-3, anti-chromatin and anti-spliceosome antibodies (accuracy 93{\%} {\textgreater}4a €...years pre-classification; 97{\%} within 2a €...years of SLE classification). Conclusions Years before SLE classification, enhancement of the type II IFN pathway allows for accumulation of autoantibodies and subsequent elevations in IFN-$\alpha$ activity immediately preceding SLE classification. Perturbations in select immunological processes may help identify at-risk individuals for further clinical evaluation or participation in prospective intervention trials.},
author = {Munroe, M.E. and Lu, R. and Zhao, Y.D. and Fife, Dustin A. and Robertson, J.M. and Guthridge, J.M. and Niewold, T.B. and Tsokos, G.C. and Keith, M.P. and Harley, J.B. and James, J.A.},
doi = {10.1136/annrheumdis-2015-208140},
issn = {14682060},
journal = {Annals of the Rheumatic Diseases},
keywords = {Autoantibodies,Autoimmunity,Chemokines,Cytokines,Systemic Lupus Erythematosus},
number = {11},
title = {{Altered type II interferon precedes autoantibody accrual and elevated type i interferon activity prior to systemic lupus erythematosus classification}},
volume = {75},
year = {2016}
}
@article{Munroe2017,
abstract = {{\textcopyright} 2017 Munroe et al. This is an open access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited. Antiviral defenses are inappropriately activated in systemic lupus erythematosus (SLE) and association between SLE and the antiviral helicase gene, IFIH1, is well established. We sought to extend the previously reported association of pathogenic soluble mediators and autoantibodies with mouse Mda5 to its human ortholog, IFIH1. To better understand the role this gene plays in human lupus, we assessed association of IFIH1 variants with soluble mediators and autoantibodies in 357 European-American SLE patients, first-degree relatives, and unrelated, unaffected healthy controls. Association between each of 135 genotyped SNPs in IFIH1 and four lupus-associated plasma mediators, IL-6, TNF-$\alpha$, IFN-$\beta$, and IP-10, were investigated via linear regression. No significant associations were found to SNPs orthologous to those identified in exon 13 of the mouse. However, outside of this region there were significant associations between IL-6 and rs76162067 (p = 0.008), as well as IP-10 and rs79711023 (p = 0.003), located in a region of IFIH1 previously shown to directly influence MDA-5 mediated IP-10 and IL-6 secretion. SLE patients and FDRs carrying the minor allele for rs79711023 demonstrated lower levels of IP-10, while only FDRs carrying the minor allele for rs76162067 demonstrated an increased level of IL-6. This would suggest that the change in IP-10 is genotypically driven, while the change in IL-6 may be reflective of SLE transition status. These data suggest that IFIH1 may contribute to SLE pathogenesis via altered inflammatory mechanisms.},
author = {Munroe, M.E. and Pezant, N. and Brown, M.A. and Fife, Dustin A. and Guthridge, J.M. and Kelly, J.A. and Wiley, G. and Gaffney, P.M. and James, J.A. and Montgomery, C.G.},
doi = {10.1371/journal.pone.0171193},
issn = {19326203},
journal = {PLoS ONE},
number = {2},
title = {{Association of IFIH1 and pro-inflammatory mediators: Potential new clues in SLE-associated pathogenesis}},
volume = {12},
year = {2017}
}
@article{Lu2016a,
abstract = {{\textcopyright} 2016 Elsevier Ltd Systemic lupus erythematosus (SLE) is a complex autoimmune disease with a poorly understood preclinical stage of immune dysregulation and symptom accrual. Accumulation of antinuclear autoantibody (ANA) specificities is a hallmark of impending clinical disease. Yet, many ANA-positive individuals remain healthy, suggesting that additional immune dysregulation underlies SLE pathogenesis. Indeed, we have recently demonstrated that interferon (IFN) pathways are dysregulated in preclinical SLE. To determine if other forms of immune dysregulation contribute to preclinical SLE pathogenesis, we measured SLE-associated autoantibodies and soluble mediators in samples from 84 individuals collected prior to SLE classification (average timespan = 5.98 years), compared to unaffected, healthy control samples matched by race, gender, age (±5 years), and time of sample procurement. We found that multiple soluble mediators, including interleukin (IL)-5, IL-6, and IFN-$\gamma$, were significantly elevated in cases compared to controls more than 3.5 years pre-classification, prior to or concurrent with autoantibody positivity. Additional mediators, including innate cytokines, IFN-associated chemokines, and soluble tumor necrosis factor (TNF) superfamily mediators increased longitudinally in cases approaching SLE classification, but not in controls. In particular, levels of B lymphocyte stimulator (BLyS) and a proliferation-inducing ligand (APRIL) were comparable in cases and controls until less than 10 months pre-classification. Over the entire pre-classification period, random forest models incorporating ANA and anti-Ro/SSA positivity with levels of IL-5, IL-6, and the IFN-$\gamma$-induced chemokine, MIG, distinguished future SLE patients with 92{\%} (±1.8{\%}) accuracy, compared to 78{\%} accuracy utilizing ANA positivity alone. These data suggest that immune dysregulation involving multiple pathways contributes to SLE pathogenesis. Importantly, distinct immunological profiles are predictive for individuals who will develop clinical SLE and may be useful for delineating early pathogenesis, discovering therapeutic targets, and designing prevention trials.},
author = {Lu, R. and Munroe, M.E. and Guthridge, J.M. and Bean, K.M. and Fife, Dustin A. and Chen, H. and Slight-Webb, S.R. and Keith, M.P. and Harley, J.B. and James, J.A.},
doi = {10.1016/j.jaut.2016.06.001},
issn = {10959157},
journal = {Journal of Autoimmunity},
keywords = {Autoantibodies,Biomarkers,Cytokines,Disease progression,Forecasting,Systemic lupus erythematosus},
title = {{Dysregulation of innate and adaptive serum mediators precedes systemic lupus erythematosus classification and improves prognostic accuracy of autoantibodies}},
volume = {74},
year = {2016}
}
@article{Casper2019,
abstract = {{\textcopyright} 2019 American Psychological Association. Summated rating scales are ubiquitous in organizational research, and there are well-delineated guidelines for scale development (e.g., Hinkin, 1998). Nevertheless, there has been less research on the explicit selection of the response anchors. Constructing survey questions with equal-interval properties (i.e., interval or ratio data) is important if researchers plan to analyze their data using parametric statistics. As such, the primary objectives of the current study were to (a) determine the most common contexts in which summated rating scales are used (e.g., agreement, similarity, frequency, amount, and judgment), (b) determine the most commonly used anchors (e.g., strongly disagree, often, very good), and (c) provide empirical data on the conceptual distance between these anchors. We present the mean and standard deviation of scores for estimates of each anchor and the percentage of distribution overlap between the anchors. Our results provide researchers with data that can be used to guide the selection of verbal anchors with equal-interval properties so as to reduce measurement error and improve confidence in the results of subsequent analyses. We also conducted multiple empirical studies to examine the consequences of measuring constructs with unequal-interval anchors. A clear pattern of results is that correlations involving unequal-interval anchors are consistently weaker than correlations involving equal-interval anchors.},
author = {Casper, W.C. and Edwards, B.D. and Wallace, J.C. and Landis, R.S. and Fife, Dustin A.},
doi = {10.1037/apl0000444},
issn = {00219010},
journal = {Journal of Applied Psychology},
keywords = {Equal distance anchors,Likert scales,Summated rating scale anchors},
title = {{Selecting Response Anchors With Equal Intervals for Summated Rating Scales}},
year = {2019}
}
@article{Fife2018b,
abstract = {{\textcopyright} The Author(s) 2018. When estimating subgroup differences on incumbents, range restriction may bias estimates. Bobko, Roth, and Bobko recognized this problem and developed a Case II and Case III correction for Cohen's d. Subsequently, Li developed a Case IV correction, which seeks to estimate group differences on a predictor using only incumbent data but must assume that group membership (e.g., ethnicity) plays no role in selection decisions. In this paper, we extend Li's correction and relax this assumption. In addition, this new correction allows for the estimation of subgroup differences on both the criterion and predictor. Using Monte Carlo simulation, we study the performance of both estimators under situations where Li's assumptions are violated and demonstrate that this new procedure almost always outperforms Li's Case IV correction and does so with greater precision. We also provide R code to assist applied researchers in using these corrections.},
author = {Fife, Dustin A. and Mendoza, J. and Day, E. and Terry, R.},
doi = {10.1177/1094428118799492},
issn = {15527425},
journal = {Organizational Research Methods},
keywords = {ANOVA methods,criterion and predictive validity strategies,missing data,quantitative research,reliability and validity},
title = {{Estimating Subgroup Differences in Staffing Research When the Selection Mechanism Is Unknown: A Response to Li's Case IV Correction}},
year = {2018}
}
@article{Fife2012a,
abstract = {Though much research and attention has been directed at assessing the correlation coefficient under range restriction, the assessment of reliability under range restriction has been largely ignored. This article uses item response theory to simulate dichotomous item-level data to assess the robustness of KR-20 ($\alpha$), $\omega$, and test-retest under varying selection ratios. These estimators, both corrected and uncorrected for range restriction, were compared in terms of both bias and precision. Test-retest reliability was usually the best estimator of reliability across a variety of conditions. Only under indirect range restriction did KR-20 and $\omega$ performed well. All estimators suffered imprecision as a function of range restriction, above and beyond the reduction in sample size. Based on the results, a set of recommendations are proposed. {\textcopyright} The Author(s) 2012.},
author = {Fife, Dustin A. and Mendoza, J.L. and Terry, R.},
doi = {10.1177/0013164411430225},
issn = {15523888},
journal = {Educational and Psychological Measurement},
keywords = {classical test theory,coefficient alpha,coefficient omega,range restriction,reliability,test-retest reliability},
number = {5},
title = {{The Assessment of Reliability Under Range Restriction: A Comparison of $\alpha$, $\omega$, and Test-Retest Reliability for Dichotomous Data}},
volume = {72},
year = {2012}
}
@article{Fife2013b,
abstract = {In 2004, Hunter and Schmidt proposed a correction (called Case IV) that seeks to estimate disattenuated correlations when selection is made on an unmeasured variable. Although Case IV is an important theoretical development in the range restriction literature, it makes an untestable assumption, namely that the partial correlation between the unobserved selection variable and the performance measure is zero. We show in this paper why this assumption may be difficult to meet and why previous simulations have failed to detect the full extent of bias. We use meta-analytic literature to investigate the plausible range of bias. We also show how Case IV performs in terms of standard errors. Finally, we give practical recommendations about how the contributions of Hunter and Schmidt (2004) can be extended without making such stringent assumptions. {\textcopyright} 2012 The British Psychological Society.},
author = {Fife, Dustin A. and Mendoza, J.L. and Terry, R.},
doi = {10.1111/j.2044-8317.2012.02060.x},
issn = {00071102},
journal = {British Journal of Mathematical and Statistical Psychology},
number = {3},
title = {{Revisiting Case IV: A reassessment of bias and standard errors of Case IV under range restriction}},
volume = {66},
year = {2013}
}
@article{Fife2014a,
abstract = {The WITNESS model (Clark in Applied Cognitive Psychology 17:629-654, 2003) provides a theoretical framework with which to investigate the factors that contribute to eyewitness identification decisions. One key factor involves the contributions of absolute versus relative judgments. An absolute contribution is determined by the degree of match between an individual lineup member and memory for the perpetrator; a relative contribution involves the degree to which the best-matching lineup member is a better match to memory than the remaining lineup members. In WITNESS, the proportional contributions of relative versus absolute judgments are governed by the values of the decision weight parameters. We conducted an exploration of the WITNESS model's parameter space to determine the identifiability of these relative/absolute decision weight parameters, and compared the results to a restricted version of the model that does not vary the decision weight parameters. This exploration revealed that the decision weights in WITNESS are difficult to identify: Data often can be fit equally well by setting the decision weights to nearly any value and compensating with a criterion adjustment. Clark, Erickson, and Breneman (Law and Human Behavior 35:364-380, 2011) claimed to demonstrate a theoretical basis for the superiority of lineup decisions that are based on absolute contributions, but the relationship between the decision weights and the criterion weakens this claim. These findings necessitate reconsidering the role of the relative/absolute judgment distinction in eyewitness decision making. {\textcopyright} 2013 Psychonomic Society, Inc.},
author = {Fife, Dustin A. and Perry, C. and Gronlund, S.D.},
doi = {10.3758/s13423-013-0493-1},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
keywords = {Computational modeling,Eyewitness identification,Relative and absolute judgments,WITNESS model},
number = {2},
title = {{Revisiting absolute and relative judgments in the WITNESS model}},
volume = {21},
year = {2014}
}
@article{Fife2014b,
abstract = {{\textcopyright} 2014, {\textcopyright} Taylor {\&} Francis Group, LLC. Much research has been directed at the validity of fit indices in Path Analysis and Structural Equation Modeling (e.g., Browne, MacCallum, Kim, Andersen, {\&} Glaser, 2002; Heene, Hilbert, Draxler, Ziegler, {\&} B{\"{u}}hner, 2011; Hu {\&} Bentler, 1999; Marsh, Hau, {\&} Wen, 2004). Recent developments (e.g., Preacher, 2006; Roberts {\&} Pashler, 2000, 2002) have encouraged researchers to investigate other criteria for comparing models, including model complexity. What has not been investigated is the inherent ability of a particular data set to be fitted with a constrained set of randomly generated linear models, which we call Model Conditioned Data Elasticity (DE). In this article we show how DE can be compared with the problem of equivalent models and a more general problem of the “confoundability” of data/model combinations (see MacCallum, Wegener, Uchino, {\&} Fabrigar, 1993). Using the DE package in R, we show how DE can be assessed through automated computer searches. Finally, we discuss how DE fits within the controversy surrounding the use of fit statistics.},
author = {Fife, Dustin A. and Rodgers, J.L. and Mendoza, J.L.},
doi = {10.1080/00273171.2014.948608},
issn = {00273171},
journal = {Multivariate Behavioral Research},
number = {6},
title = {{Model Conditioned Data Elasticity in Path Analysis: Assessing the “Confoundability” of Model/Data Characteristics}},
volume = {49},
year = {2014}
}
@article{Fife2016,
abstract = {{\textcopyright} 2016, {\textcopyright} The Author(s) 2016. Correcting attenuated correlations from selected samples is a common goal in organizational settings. Hunter and Schmidt introduced a procedure, called Case IV, for correcting correlations when a researcher has no information on the variable(s) used by an organization to form a suitability judgment. In this article, we compare Case IV to two other comparable procedures: the first correction (the expectation maximization algorithm) requires raw data about the selection variables used to form a suitability judgment. The second, the Pearson-Lawley correction, requires the variance-covariance matrix of the selection variables. We show that even when the variables used for selection are unobserved or unavailable, it is still possible to estimate parameters without making the restrictive assumptions of Case IV. In addition, these two corrections almost always outperform Case IV, particularly when the critical assumption of Case IV is violated. We also provide R code illustrating the use of these correction procedures.},
author = {Fife, Dustin A. and Hunter, M.D. and Mendoza, J.L.},
doi = {10.1177/1094428115625323},
issn = {15527425},
journal = {Organizational Research Methods},
keywords = {criterion and predictive validity strategies,missing data,quantitative structural equation modeling,sampling},
number = {4},
title = {{Estimating Unattenuated Correlations With Limited Information About Selection Variables: Alternatives to Case IV}},
volume = {19},
year = {2016}
}
@article{Fife2017b,
abstract = {{\textcopyright} 2017 Taylor {\&} Francis Group, LLC. A common form of missing data is caused by selection on an observed variable (e.g., Z). If the selection variable was measured and is available, the data are regarded as missing at random (MAR). Selection biases correlation, reliability, and effect size estimates when these estimates are computed on listwise deleted (LD) data sets. On the other hand, maximum likelihood (ML) estimates are generally unbiased and outperform LD in most situations, at least when the data are MAR. The exception is when we estimate the partial correlation. In this situation, LD estimates are unbiased when the cause of missingness is partialled out. In other words, there is no advantage of ML estimates over LD estimates in this situation. We demonstrate that under a MAR condition, even ML estimates may become biased, depending on how partial correlations are computed. Finally, we conclude with recommendations about how future researchers might estimate partial correlations even when the cause of missingness is unknown and, perhaps, unknowable.},
author = {Fife, Dustin A. and Mendoza, J.L. and Berry, C.M.},
doi = {10.1080/00273171.2016.1259099},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Missing data,incremental validity,listwise deletion,maximum likelihood},
number = {2},
title = {{Estimating Incremental Validity Under Missing Data}},
volume = {52},
year = {2017}
}
@article{Forstmeier2017,
abstract = {Recently there has been a growing concern that many published research findings do not hold up in attempts to replicate them. We argue that this problem may originate from a culture of ‘you can publish if you found a significant effect'. This culture creates a systematic bias against the null hypothesis which renders meta-analyses questionable and may even lead to a situation where hypotheses become difficult to falsify. In order to pinpoint the sources of error and possible solutions, we review current scientific practices with regard to their effect on the probability of drawing a false-positive conclusion. We explain why the proportion of published false-positive findings is expected to increase with (i) decreasing sample size, (ii) increasing pursuit of novelty, (iii) various forms of multiple testing and researcher flexibility, and (iv) incorrect P-values, especially due to unaccounted pseudoreplication, i.e. the non-independence of data points (clustered data). We provide examples showing how statistical pitfalls and psychological traps lead to conclusions that are biased and unreliable, and we show how these mistakes can be avoided. Ultimately, we hope to contribute to a culture of ‘you can publish if your study is rigorous'. To this end, we highlight promising strategies towards making science more objective. Specifically, we enthusiastically encourage scientists to preregister their studies (including a priori hypotheses and complete analysis plans), to blind observers to treatment groups during data collection and analysis, and unconditionally to report all results. Also, we advocate reallocating some efforts away from seeking novelty and discovery and towards replicating important research findings of one's own and of others for the benefit of the scientific community as a whole. We believe these efforts will be aided by a shift in evaluation criteria away from the current system which values metrics of ‘impact' almost exclusively and towards a system which explicitly values indices of scientific rigour.},
author = {Forstmeier, Wolfgang and Wagenmakers, Eric Jan and Parker, Timothy H.},
doi = {10.1111/brv.12315},
file = {::},
issn = {1469185X},
journal = {Biological Reviews},
number = {4},
title = {{Detecting and avoiding likely false-positive findings – a practical guide}},
volume = {92},
year = {2017}
}
@article{Kenny2019,
abstract = {Repeated investigations of the same phenomenon typically yield effect sizes that vary more than one would expect from sampling error alone. Such variation is even found in exact replication studies, suggesting that it is not only because of identifiable moderators but also to subtler random variation across studies. Such heterogeneity of effect sizes is typically ignored, with unfortunate consequences. We consider its implications for power analyses, the precision of estimated effects, and the planning of original and replication research. With heterogeneity and an interest in generalizing to a population of studies, the usual power calculations and confidence intervals are likely misleading, and the preference for single definitive large-N studies is misguided. Researchers and methodologists need to recognize that effects are often heterogeneous and plan accordingly.},
author = {Kenny, David A. and Judd, Charles M.},
doi = {10.1037/met0000209},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
title = {{The Unappreciated Heterogeneity of Effect Sizes: Implications for Power, Precision, Planning of Research, and Replication}},
year = {2019}
}
@article{Consonni2018,
abstract = {We provide a review of prior distributions for objective Bayesian analysis. We start by examining some foundational issues and then organize our exposition into priors for: i) estimation or prediction; ii) model selection; iii) high-dimensional models. With regard to i), we present some basic notions, and then move to more recent contributions on discrete parameter space, hierarchical models , nonparametric models, and penalizing complexity priors. Point ii) is the focus of this paper: it discusses principles for objective Bayesian model comparison, and singles out some major concepts for building priors, which are subsequently illustrated in some detail for the classic problem of variable selection in normal linear models. We also present some recent contributions in the area of objective priors on model space. With regard to point iii) we only provide a short summary of some default priors for high-dimensional models, a rapidly growing area of research.},
author = {Consonni, Guido and Fouskakis, Dimitris and Liseo, Brunero and Ntzoufras, Ioannis},
doi = {10.1214/18-BA1103},
file = {::},
journal = {Bayesian Analysis},
keywords = {62-02,62A01,62J05,criteria for model choice,high-dimensional model62F15,model comparison,noninformative prior,objective Bayes,reference prior,variable selection},
number = {2},
pages = {627--679},
title = {{Prior Distributions for Objective Bayesian Analysis}},
url = {https://doi.org/10.1214/18-BA1103},
volume = {13},
year = {2018}
}
@article{Appelbaum2018a,
abstract = {Following a review of extant reporting standards for scientific publication, and reviewing 10 years of experience since publication of the first set of reporting standards by the American Psychological Association (APA; APA Publications and Communications Board Working Group on Journal Article Reporting Standards, 2008), the APA Working Group on Quantitative Research Reporting Standards recommended some modifications to the original standards. Examples of modifications include division of hypotheses, analyses, and conclusions into 3 groupings (primary, secondary, and exploratory) and some changes to the section on meta-analysis. Several new modules are included that report standards for observational studies, clinical trials, longitudinal studies, replication studies, and N-of-1 studies. In addition, standards for analytic methods with unique characteristics and output (structural equation modeling and Bayesian analysis) are included. These proposals were accepted by the Publications and Communications Board of APA and supersede the standards included in the 6th edition of the Publication Manual of the American Psychological Association (APA, 2010).},
author = {Appelbaum, Mark and Cooper, Harris and Kline, Rex B. and Mayo-Wilson, Evan and Nezu, Arthur M. and Rao, Stephen M.},
doi = {10.1037/amp0000191},
issn = {0003066X},
journal = {American Psychologist},
keywords = {APA Style,Meta-analysis,Reporting standards,Research methods},
month = {jan},
number = {1},
pages = {3--25},
pmid = {29345484},
publisher = {American Psychological Association Inc.},
title = {{Journal article reporting standards for quantitative research in psychology: The APA publications and Communications Board task force report}},
volume = {73},
year = {2018}
}
@article{Rodgers1999,
author = {Rodgers, Joseph Lee},
doi = {10.1207/S15327906MBR3404_2},
file = {::},
issn = {0027-3171},
journal = {Multivariate behavioral research},
number = {4},
pages = {441--456},
title = {{The Bootstrap, the Jackknife, and the Randomization Test: A Sampling Taxonomy}},
volume = {34},
year = {1999}
}
@article{Maxwell2004,
abstract = {Underpowered studies persist in the psychological literature. This article examines reasons for their persistence and the effects on efforts to create a cumulative science. The "curse of multiplicities" plays a central role in the presentation. Most psychologists realize that testing multiple hypotheses in a single study affects the Type I error rate, but corresponding implications for power have largely been ignored. The presence of multiple hypothesis tests leads to 3 different conceptualizations of power. Implications of these 3 conceptualizations are discussed from the perspective of the individual researcher and from the perspective of developing a coherent literature. Supplementing significance tests with effect size measures and confidence intervals is shown to address some but not necessarily all problems associated with multiple testing.},
author = {Maxwell, Scott E},
doi = {10.1037/1082-989X.9.2.147},
file = {::},
number = {2},
pages = {147--163},
title = {{The Persistence of Underpowered Studies in Psychological Research: Causes, Consequences, and Remedies}},
volume = {9},
year = {2004}
}
@article{Anderson2017a,
abstract = {Psychology is undergoing a replication crisis. The discussion surrounding this crisis has centered on mistrust of previous findings. Researchers planning replication studies often use the original study sample effect size as the basis for sample size planning. However, this strategy ignores uncertainty and publication bias in estimated effect sizes, resulting in overly optimistic calculations. A psychologist who intends to obtain power of.80 in the replication study, and performs calculations accordingly, may have an actual power lower than.80. We performed simulations to reveal the magnitude of the difference between actual and intended power based on common sample size planning strategies and assessed the performance of methods that aim to correct for effect size uncertainty and/or bias. Our results imply that even if original studies reflect actual phenomena and were conducted in the absence of questionable research practices, popular approaches to designing replication studies may result in a low success rate, especially if the original study is underpowered. Methods correcting for bias and/or uncertainty generally had higher actual power, but were not a panacea for an underpowered original study. Thus, it becomes imperative that 1) original studies are adequately powered and 2) replication studies are designed with methods that are more likely to yield the intended level of power.},
author = {Anderson, Samantha F. and Maxwell, Scott E.},
doi = {10.1080/00273171.2017.1289361},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Effect size,power,replication,sample size planning},
month = {may},
number = {3},
pages = {305--324},
publisher = {Routledge},
title = {{Addressing the “Replication Crisis”: Using Original Studies to Design Replication Studies with Appropriate Statistical Power}},
volume = {52},
year = {2017}
}
@misc{dharma,
title = {{DHARMa: residual diagnostics for hierarchical (multi-level/mixed) regression models}},
url = {https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html},
urldate = {2019-12-09}
}
@article{Morris2019,
abstract = {Simulation studies are computer experiments that involve creating data by pseudo-random sampling. A key strength of simulation studies is the ability to understand the behavior of statistical methods because some “truth” (usually some parameter/s of interest) is known from the process of generating the data. This allows us to consider properties of methods, such as bias. While widely used, simulation studies are often poorly designed, analyzed, and reported. This tutorial outlines the rationale for using simulation studies and offers guidance for design, execution, analysis, reporting, and presentation. In particular, this tutorial provides a structured approach for planning and reporting simulation studies, which involves defining aims, data-generating mechanisms, estimands, methods, and performance measures (“ADEMP”); coherent terminology for simulation studies; guidance on coding simulation studies; a critical discussion of key performance measures and their estimation; guidance on structuring tabular and graphical presentation of results; and new graphical presentations. With a view to describing recent practice, we review 100 articles taken from Volume 34 of Statistics in Medicine, which included at least one simulation study and identify areas for improvement.},
archivePrefix = {arXiv},
arxivId = {1712.03198},
author = {Morris, Tim P. and White, Ian R. and Crowther, Michael J.},
doi = {10.1002/sim.8086},
eprint = {1712.03198},
file = {::},
issn = {10970258},
journal = {Statistics in Medicine},
number = {11},
title = {{Using simulation studies to evaluate statistical methods}},
volume = {38},
year = {2019}
}
@article{VanBavel2016,
abstract = {In recent years, scientists have paid increasing attention to reproducibility. For example, the Reproducibility Project, a large-scale replication attempt of 100 studies published in top psychology journals found that only 39{\%} could be unambiguously reproduced. There is a growing consensus among scientists that the lack of reproducibility in psychology and other fields stems from various methodological factors, including low statistical power, researcher's degrees of freedom, and an emphasis on publishing surprising positive results. However, there is a contentious debate about the extent to which failures to reproduce certain results might also reflect contextual differences (often termed "hiddenmoderators") between the original research and the replication attempt. Although psychologists have found extensive evidence that contextual factors alter behavior, some have argued that context is unlikely to influence the results of direct replications precisely because these studies use the same methods as those used in the original research. To help resolve this debate, we recoded the 100 original studies from the Reproducibility Project on the extent to which the research topic of each study was contextually sensitive. Results suggested that the contextual sensitivity of the research topic was associated with replication success, even after statistically adjusting for several methodological characteristics (e.g., statistical power, effect size). The association between contextual sensitivity and replication success did not differ across psychological subdisciplines. These results suggest that researchers, replicators, and consumers should bemindful of contextual factors that might influence a psychological process. We offer several guidelines for dealing with contextual sensitivity in reproducibility.},
author = {{Van Bavel}, Jay J. and Mende-Siedlecki, Peter and Brady, William J. and Reinero, Diego A.},
doi = {10.1073/pnas.1521897113},
issn = {10916490},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
number = {23},
pmid = {27217556},
title = {{Contextual sensitivity in scientific reproducibility}},
volume = {113},
year = {2016}
}
@article{Woalder2017,
author = {Woalder},
doi = {10.1016/j.physbeh.2017.03.040},
file = {::},
isbn = {2163684814},
journal = {Physiology {\&} behavior},
keywords = {determination,protein crystallography,protein data bank,r -factor,resolution,restraints,structure,structure interpretation,structure quality,structure refinement,structure validation},
number = {1},
pages = {139--148},
title = {{乳鼠心肌提取 HHS Public Access}},
volume = {176},
year = {2017}
}
@article{Vigneau2018,
abstract = {The purpose of this paper is to discuss the application of the Random Forest methodology to sensory analysis. A methodological point of view is mainly adopted to describe as simply as possible the construction of binary decision trees and, more precisely, Classification and Regression Trees (CART), as well as the generation of an ensemble of trees or, in other words, a Random Forest. The interest of the permutation accuracy criterion, as a measure of variable importance, is specifically emphasized as a way of identifying the most predictive variables and selecting a subset of these variables for parsimonious and efficient predictive models. A two-step procedure is proposed for choosing this subset of variables. The principle of the method is illustrated in a case study in which the aim was to better understand and predict the olfactory characteristics of red wines made of the Cabernet Franc grape variety, from their Volatile Organic Compound (VOC) content. For two main olfactory attributes, the bell pepper odor and the leather odor, it was possible to list the most important compounds and to highlight a very small number of compounds useful for estimating each of the olfactory attributes considered. For the latter, it was also observed that Random Forest models had a better predictive ability than Partial Least Squares (PLS) Regression models.},
author = {Vigneau, E. and Courcoux, P. and Symoneaux, R. and Gu{\'{e}}rin, L. and Villi{\`{e}}re, A.},
doi = {10.1016/j.foodqual.2018.02.008},
file = {::},
issn = {09503293},
journal = {Food Quality and Preference},
keywords = {CART,Olfactory perception,Random forest,Volatile organic compounds,Wine},
number = {May 2017},
pages = {135--145},
publisher = {Elsevier},
title = {{Random forests: A machine learning methodology to highlight the volatile organic compounds involved in olfactory perception}},
url = {https://doi.org/10.1016/j.foodqual.2018.02.008},
volume = {68},
year = {2018}
}
@article{Brandmaier2016,
abstract = {Structural equation model (SEM) trees, a combination of SEMs and decision trees, have been proposed as a data-analytic tool for theory-guided exploration of empirical data. With respect to a hypothesized model of multivariate outcomes, such trees recursively find subgroups with similar patterns of observed data. SEM trees allow for the automatic selection of variables that predict differences across individuals in specific theoretical models, for instance, differences in latent factor profiles or developmental trajectories. However, SEM trees are unstable when small variations in the data can result in different trees. As a remedy, SEM forests, which are ensembles of SEM trees based on resamplings of the original dataset, provide increased stability. Because large forests are less suitable for visual inspection and interpretation, aggregate measures provide researchers with hints on how to improve their models: (a) variable importance is based on random permutations of the out-of-bag (OOB) samples of the individual trees and quantifies, for each variable, the average reduction of uncertainty about the model-predicted distribution; and (b) case proximity enables researchers to perform clustering and outlier detection. We provide an overview of SEM forests and illustrate their utility in the context of cross-sectional factor models of intelligence and episodic memory. We discuss benefits and limitations, and provide advice on how and when to use SEM trees and forests in future research.},
author = {Brandmaier, Andreas M. and Prindle, John J. and McArdle, John J. and Lindenberger, Ulman},
doi = {10.1037/met0000090},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Case proximity,Model-based tree,Recursive partitioning,SEM forest,Variable importance},
number = {4},
pages = {566--582},
title = {{Theory-guided exploration with structural equation model forests}},
volume = {21},
year = {2016}
}
@article{King2014,
abstract = {Data mining of treatment study results can reveal unforeseen but critical insights, such as who receives the most benefit from treatment and under what circumstances. The usefulness and legitimacy of exploratory data analysis have received relatively little recognition, however, and analytic methods well suited to the task are not widely known in psychology. With roots in computer science and statistics, statistical learning approaches offer a credible option: These methods take a more inductive approach to building a model than is done in traditional regression, allowing the data greater role in suggesting the correct relationships between variables rather than imposing them a priori. Classification and regression trees are presented as a powerful, flexible exemplar of statistical learning methods. Trees allow researchers to efficiently identify useful predictors of an outcome and discover interactions between predictors without the need to anticipate and specify these in advance, making them ideal for revealing patterns that inform hypotheses about treatment effects. Trees can also provide a predictive model for forecasting outcomes as an aid to clinical decision making. This primer describes how tree models are constructed, how the results are interpreted and evaluated, and how trees overcome some of the complexities of traditional regression. Examples are drawn from randomized clinical trial data and highlight some interpretations of particular interest to treatment researchers. The limitations of tree models are discussed, and suggestions for further reading and choices in software are offered.},
author = {King, Matthew W. and Resick, Patricia A.},
doi = {10.1037/a0035886},
file = {::},
issn = {19392117},
journal = {Journal of Consulting and Clinical Psychology},
keywords = {CART,Classification and regression trees,Data mining,Exploratory data analysis},
number = {5},
pages = {895--905},
title = {{Data mining in psychological treatment research: A primer on classification and regression trees}},
volume = {82},
year = {2014}
}
@article{Garcia-Perez2017,
abstract = {Null hypothesis significance testing (NHST) has been the subject of debate for decades and alternative approaches to data analysis have been proposed. This article addresses this debate from the perspective of scientific inquiry and inference. Inference is an inverse problem and application of statistical methods cannot reveal whether effects exist or whether they are empirically meaningful. Hence, raising conclusions from the outcomes of statistical analyses is subject to limitations. NHST has been criticized for its misuse and the misconstruction of its outcomes, also stressing its inability to meet expectations that it was never designed to fulfil. Ironically, alternatives to NHST are identical in these respects, something that has been overlooked in their presentation. Three of those alternatives are discussed here (estimation via confidence intervals and effect sizes, quantification of evidence via Bayes factors, and mere reporting of descriptive statistics). None of them offers a solution to the problems that NHST is purported to have, all of them are susceptible to misuse and misinterpretation, and some bring around their own problems (e.g., Bayes factors have a one-to-one correspondence with p values, but they are entirely deprived of an inferential framework). Those alternatives also fail to cover a broad area of inference not involving distributional parameters, where NHST procedures remain the only (and suitable) option. Like knives or axes, NHST is not inherently evil; only misuse and misinterpretation of its outcomes needs to be eradicated.},
author = {Garc{\'{i}}a-P{\'{e}}rez, Miguel A.},
doi = {10.1177/0013164416668232},
issn = {15523888},
journal = {Educational and Psychological Measurement},
number = {4},
title = {{Thou Shalt Not Bear False Witness Against Null Hypothesis Significance Testing}},
volume = {77},
year = {2017}
}
@article{Johnson2017,
abstract = {Investigators from a large consortium of scientists recently performed a multi-year study in which they replicated 100 psychology experiments. Although statistically significant results were reported in 97{\%} of the original studies, statistical significance was achieved in only 36{\%} of the replicated studies. This article presents a reanalysis of these data based on a formal statistical model that accounts for publication bias by treating outcomes from unpublished studies as missing data, while simultaneously estimating the distribution of effect sizes for those studies that tested nonnull effects. The resulting model suggests that more than 90{\%} of tests performed in eligible psychology experiments tested negligible effects, and that publication biases based on p-values caused the observed rates of nonreproducibility. The results of this reanalysis provide a compelling argument for both increasing the threshold required for declaring scientific discoveries and for adopting statistical summaries of evidence that account for the high proportion of tested hypotheses that are false. Supplementary materials for this article are available online.},
author = {Johnson, Valen E. and Payne, Richard D. and Wang, Tianying and Asher, Alex and Mandal, Soutrik},
doi = {10.1080/01621459.2016.1240079},
issn = {1537274X},
journal = {Journal of the American Statistical Association},
number = {517},
title = {{On the Reproducibility of Psychological Science}},
volume = {112},
year = {2017}
}
@misc{Edwards2017,
abstract = {Over the last 50 years, we argue that incentives for academic scientists have become increasingly perverse in terms of competition for research funding, development of quantitative metrics to measure performance, and a changing business model for higher education itself. Furthermore, decreased discretionary funding at the federal and state level is creating a hypercompetitive environment between government agencies (e.g., EPA, NIH, CDC), for scientists in these agencies, and for academics seeking funding from all sources-the combination of perverse incentives and decreased funding increases pressures that can lead to unethical behavior. If a critical mass of scientists become untrustworthy, a tipping point is possible in which the scientific enterprise itself becomes inherently corrupt and public trust is lost, risking a new dark age with devastating consequences to humanity. Academia and federal agencies should better support science as a public good, and incentivize altruistic and ethical outcomes, while de-emphasizing output.},
author = {Edwards, Marc A. and Roy, Siddhartha},
booktitle = {Environmental Engineering Science},
doi = {10.1089/ees.2016.0223},
issn = {15579018},
number = {1},
title = {{Academic Research in the 21st Century: Maintaining Scientific Integrity in a Climate of Perverse Incentives and Hypercompetition}},
volume = {34},
year = {2017}
}
@techreport{Merkle,
abstract = {Typical Bayesian methods for models with latent variables (or random effects) involve directly sampling the latent variables along with the model parameters. In high-level software code for model definitions (using, e.g., BUGS, JAGS, Stan), the likelihood is therefore specified as conditional on the latent variables. This can lead researchers to perform model comparisons via conditional likelihoods, where the latent variables are considered model parameters. In other settings, however, typical model comparisons involve marginal likelihoods where the latent variables are integrated out. This distinction is often overlooked despite the fact that it can have a large impact on the comparisons of interest. In this paper, we clarify and illustrate these issues , focusing on the comparison of conditional and marginal Deviance Information Criteria (DICs) and Watanabe-Akaike Information Criteria (WAICs) in psychometric modeling. The conditional/marginal distinction corresponds to whether the model should be predictive for the clusters that are in the data or for new clusters (where "clusters" typically correspond to higher-level units like people or schools). Correspondingly, we show that marginal WAIC corresponds to leave-one-cluster out (LOcO) cross-validation, whereas conditional WAIC corresponds to leave-one-unit out (LOuO). These results lead to recommendations on the general application of the criteria to models with latent variables.},
archivePrefix = {arXiv},
arxivId = {1802.04452v3},
author = {Merkle, E C and Furr, D and Rabe-Hesketh, S},
eprint = {1802.04452v3},
file = {::},
isbn = {1802.04452v3},
keywords = {Bayesian information criteria,DIC,IRT,MCMC,SEM,WAIC,conditional likelihood,cross-validation,leave-one-cluster out,marginal likelihood},
title = {{Bayesian comparison of latent variable models: Conditional vs marginal likelihoods}},
url = {http://semtools.r-forge.r-project.org/.}
}
@article{Heino2019,
abstract = {Background: Visualisations and readily-accessible web-based supplementary files can improve data reporting and transparency. In this paper, we make use of recent developments in software and psychological network analysis to describe the baseline cohort of a trial testing the Let's Move It intervention, which aimed to increase physical activity (PA) and reduce sedentary behaviours (SB) among vocational school students. Methods: At baseline, 1166 adolescents, distributed across 6 school clusters and four educational tracks, completed measures of PA and SB, theoretical predictors of these behaviours, and body composition. Within a comprehensive website supplement, which includes all code and analyses, data were tabulated and visualised, and network analyses explored relations between predictor variables and outcomes. Results: Average daily moderate-to-vigorous PA was 65 min (CI95: 57min-73 min), and SB 8h44 min (CI95: 8h04min-9h24 min), with 25.8 (CI95: 23.5-28.0) interruptions to sitting. Cluster randomisation appeared to result in balanced distributions for baseline characteristics between intervention and control groups, but differences emerged across the four educational tracks. Self-reported behaviour change technique (BCT) use was low for many but not all techniques. A network analysis revealed direct relationships between PA and behavioural experiments, planning and autonomous motivation, and several BCTs were connected to PA via autonomous motivation. Visualisation uncovered a case of Simpson's paradox. Conclusions: Data-visualisation and data exploration techniques (e.g. network analysis) can help reveal the dynamics involved in complex multi-causal systems-a challenging task with traditional data presentations. The benefits of presenting complex data visually should encourage researchers to publish extensive analyses and descriptions as website supplements, which would increase the},
author = {Heino, Matti T. J. and Knittle, Keegan and Fried, Eiko and Sund, Reijo and Haukkala, Ari and Borodulin, Katja and Uutela, Antti and Araujo-Soares, Vera and Vasankari, Tommi and Hankonen, Nelli},
doi = {10.1080/21642850.2019.1646136},
file = {::},
issn = {2164-2850},
journal = {Health Psychology and Behavioral Medicine},
keywords = {graphics, superiority of,graphics: condensing information,visualization; replication crisis},
mendeley-tags = {graphics, superiority of,graphics: condensing information,visualization; replication crisis},
number = {1},
pages = {269--289},
title = {{Visualisation and network analysis of physical activity and its determinants: Demonstrating opportunities in analysing baseline associations in the Let's Move It trial}},
volume = {7},
year = {2019}
}
@article{Stefan,
author = {Stefan, Angelika and Evans, Nathan J. and Wagenmakers, Eric-Jan},
doi = {10.31234/OSF.IO/D42XB},
keywords = {Bayesian Methods,Informative Prior,Methodological Flexibility,Prior Distribution,Prior Elicitation,Psychology,Quantitative Methods,Social and Behavioral Sciences,Statistical Methods,other},
publisher = {PsyArXiv},
title = {{Practical Challenges and Methodological Flexibility in Prior Elicitation}}
}
@article{Lei2019,
author = {Lei, Pui-Wa and Shiverdecker, Levi K.},
doi = {10.1080/10705511.2019.1680292},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {nov},
pages = {1--18},
title = {{Performance of Estimators for Confirmatory Factor Analysis of Ordinal Variables with Missing Data}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1680292},
year = {2019}
}
@article{Tong2019,
author = {Tong, Xin and Zhang, Zhiyong},
doi = {10.1080/10705511.2019.1683014},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {nov},
pages = {1--17},
title = {{Robust Bayesian Approaches in Growth Curve Modeling: Using Student's {\textless}i{\textgreater}t{\textless}/i{\textgreater} Distributions versus a Semiparametric Method}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1683014},
year = {2019}
}
@article{Jak2019,
author = {Jak, Suzanne and {W-L Cheung}, Mike},
doi = {10.1080/10705511.2019.1688155},
file = {::},
issn = {1532-8007},
title = {{Structural Equation Modeling: A Multidisciplinary Journal A Commentary on Lv and Maeda (2019)}},
url = {https://www.tandfonline.com/action/journalInformation?journalCode=hsem20},
year = {2019}
}
@article{Kessler2003,
abstract = {Background: Public Law 102-321 established a block grant for adults with "serious mental illness" (SMI) and required the Substance Abuse and Mental Health Services Administration (SAMHSA) to develop a method to estimate the prevalence of SMI. Methods: Three SMI screening scales were developed for possible use in the SAMHSA National Household Survey on Drug Abuse: the Composite International Diagnostic Interview Short-Form (CIDI-SF) scale, the K10/K6 nonspecific distress scales, and the World Health Organization Disability Assessment Schedule (WHO-DAS). An enriched convenience sample of 155 respondents was administered all screening scales followed by the 12-month Structured Clinical Interview for DSM-IV and the Global Assessment of Functioning (GAF). We defined SMI as any 12-month DSM-IV disorder, other than a substance use disorder, with a GAF score of less than 60. Results: All screening scales were significantly related to SMI. However, neither the CIDI-SF nor the WHO-DAS improved prediction significantly over the K10 or K6 scales. The area under the receiver operating characteristic curve of SMI was 0.854 for K10 and 0.865 for K6. The most efficient screening scale, K6, had a sensitivity (SE) of 0.36 (0.08) and a specificity of 0.96 (0.02) in predicting SMI. Conclusions: The brevity and accuracy of the K6 and K10 scales make them attractive screens for SMI. Routine inclusion of either scale in clinical studies would create an important, and heretofore missing, crosswalk between community and clinical epidemiology.},
author = {Kessler, Ronald C. and Barker, Peggy R. and Colpe, Lisa J. and Epstein, Joan F. and Gfroerer, Joseph C. and Hiripi, Eva and Howes, Mary J. and Normand, Sharon Lise T. and Manderscheid, Ronald W. and Walters, Ellen E. and Zaslavsky, Alan M.},
doi = {10.1001/archpsyc.60.2.184},
file = {::},
issn = {0003990X},
journal = {Archives of General Psychiatry},
month = {feb},
number = {2},
pages = {184--189},
pmid = {12578436},
title = {{Screening for serious mental illness in the general population}},
volume = {60},
year = {2003}
}
@article{Steegen2016,
abstract = {Empirical research inevitably includes constructing a data set by processing raw data into a form ready for statistical analysis. Data processing often involves choices among several reasonable options for excluding, transforming, and coding data. We suggest that instead of performing only one analysis, researchers could perform a multiverse analysis, which involves performing all analyses across the whole set of alternatively processed data sets corresponding to a large set of reasonable scenarios. Using an example focusing on the effect of fertility on religiosity and political attitudes, we show that analyzing a single data set can be misleading and propose a multiverse analysis as an alternative practice. A multiverse analysis offers an idea of how much the conclusions change because of arbitrary choices in data construction and gives pointers as to which choices are most consequential in the fragility of the result.},
author = {Steegen, Sara and Tuerlinckx, Francis and Gelman, Andrew and Vanpaemel, Wolf},
doi = {10.1177/1745691616658637},
file = {::},
issn = {17456924},
journal = {Perspectives on Psychological Science},
keywords = {arbitrary choices,data processing,good research practices,multiverse analysis,selective reporting,transparency},
month = {sep},
number = {5},
pages = {702--712},
publisher = {SAGE Publications Inc.},
title = {{Increasing Transparency Through a Multiverse Analysis}},
volume = {11},
year = {2016}
}
@article{Berkson1942,
annote = {doi: 10.1080/01621459.1942.10501760},
author = {Berkson, Joseph},
doi = {10.1080/01621459.1942.10501760},
issn = {0162-1459},
journal = {Journal of the American Statistical Association},
month = {sep},
number = {219},
pages = {325--335},
publisher = {Taylor {\&} Francis},
title = {{Tests of Significance Considered as Evidence}},
url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1942.10501760},
volume = {37},
year = {1942}
}
@techreport{Smyth,
author = {Smyth},
file = {::},
title = {{Springer Texts in Statistics Generalized Linear Models With Examples in R}},
url = {http://www.springer.com/series/417}
}
@article{Marsman2017,
abstract = {We illustrate the Bayesian approach to data analysis using the newly developed statistical software program JASP. With JASP, researchers are able to take advantage of the benefits that the Bayesian framework has to offer in terms of parameter estimation and hypothesis testing. The Bayesian advantages are discussed using real data on the relation between Quality of Life and Executive Functioning in children with Autism Spectrum Disorder.},
author = {Marsman, Maarten and Wagenmakers, Eric Jan},
doi = {10.1080/17405629.2016.1259614},
issn = {17405610},
journal = {European Journal of Developmental Psychology},
number = {5},
title = {{Bayesian benefits with JASP}},
volume = {14},
year = {2017}
}
@article{OBoyle2017,
abstract = {The issue of a published literature not representative of the population of research is most often discussed in terms of entire studies being suppressed. However, alternative sources of publication bias are questionable research practices (QRPs) that entail post hoc alterations of hypotheses to support data or post hoc alterations of data to support hypotheses. Using general strain theory as an explanatory framework, we outline the means, motives, and opportunities for researchers to better their chances of publication independent of rigor and relevance. We then assess the frequency of QRPs in management research by tracking differences between dissertations and their resulting journal publications. Our primary finding is that from dissertation to journal article, the ratio of supported to unsupported hypotheses more than doubled (0.82 to 1.00 versus 1.94 to 1.00). The rise in predictive accuracy resulted from the dropping of statistically nonsignificant hypotheses, the addition of statistically significant hypotheses, the reversing of predicted direction of hypotheses, and alterations to data. We conclude with recommendations to help mitigate the problem of an unrepresentative literature that we label the “Chrysalis Effect.”},
author = {O'Boyle, Ernest Hugh and Banks, George Christopher and Gonzalez-Mul{\'{e}}, Erik},
doi = {10.1177/0149206314527133},
issn = {15571211},
journal = {Journal of Management},
number = {2},
title = {{The Chrysalis Effect: How Ugly Initial Results Metamorphosize Into Beautiful Articles}},
volume = {43},
year = {2017}
}
@article{Higginson2016,
abstract = {We can regard the wider incentive structures that operate across science, such as the priority given to novel findings, as an ecosystem within which scientists strive to maximise their fitness (i.e., publication record and career success). Here, we develop an optimality model that predicts the most rational research strategy, in terms of the proportion of research effort spent on seeking novel results rather than on confirmatory studies, and the amount of research effort per exploratory study. We show that, for parameter values derived from the scientific literature, researchers acting to maximise their fitness should spend most of their effort seeking novel results and conduct small studies that have only 10{\%}–40{\%} statistical power. As a result, half of the studies they publish will report erroneous conclusions. Current incentive structures are in conflict with maximising the scientific value of research; we suggest ways that the scientific ecosystem could be improved.},
author = {Higginson, Andrew D. and Munaf{\`{o}}, Marcus R.},
doi = {10.1371/journal.pbio.2000995},
file = {::},
issn = {15457885},
journal = {PLoS Biology},
number = {11},
title = {{Current Incentives for Scientists Lead to Underpowered Studies with Erroneous Conclusions}},
volume = {14},
year = {2016}
}
@book{Furr2014,
abstract = {Psychometrics and measurement are important for all aspects of psychological research and especially so in social/personality psychology. This volume provides conceptual and practical foundations in scale construction and psychometrics for producers and consumers of social and personality research. It covers basic principles, practices, and processes in scale construction, scale evaluation, scale use and interpretation of research results in the context of psychological measurement. It explains fundamental concepts and methods related to dimensionality, reliability, and validity. In addition, it provides relatively non-technical introductions to special topics and advanced psychometric perspectives such as Confirmatory Factor Analysis, Generalizability Theory, and Item Response Theory. Social/personality research is often grounded in effective measurement, but poor measurement can and does compromise the meaningfulness of psychological research. This volume is intended to raise awareness and understanding of issues that will enhance even further the generally good conduct and interpretation of research in social and personality psychology.This text will be perfect for all advanced students and researchers in social and personality psychology using psychometrics or measurement as part of their studies or research.},
address = {Thousand Oaks, CA},
author = {Furr, R. Michael},
booktitle = {Scale Construction and Psychometrics for Social and Personality Psychology},
doi = {10.4135/9781446287866},
month = {jan},
publisher = {SAGE},
title = {{Scale Construction and Psychometrics for Social and Personality Psychology}},
year = {2014}
}
@article{Rodgers2010,
abstract = {Some of Robinson {\&} Levin's critique of Rodgers (2010) is cogent, helpful, and insightful - although limiting. Recent methodology has advanced through the development of structural equation modeling, multi-level modeling, missing data methods, hierarchical linear modeling, categorical data analysis, as well as the development of many dedicated and specific behavioral models. These methodological approaches are based on a revised epistemological system, and have emerged naturally, without the need for task forces, or even much self-conscious discussion. The original goal was neither to develop nor promote a modeling revolution. That has occurred; I documented its development and its status. Two organizing principles are presented that show how both perspectives can be reconciled and accommodated. A program of research that could not have occurred within the standard NHST epistemology, without a modeling perspective, is discussed. An historical and cross-disciplinary analogy suggests their view is similar to Galileo's world view, whereas some branches of social and behavioral science may be ready for something closer to a Newtonian perspective. Copyright {\textcopyright} 2010 JMASM, Inc. {\textcopyright} 2010 JMASM, Inc.},
author = {Rodgers, Joseph Lee},
doi = {10.22237/jmasm/1288584120},
file = {::},
issn = {15389472},
journal = {Journal of Modern Applied Statistical Methods},
keywords = {Mathematical modelsd,Modeling,NHST (or Null Hypothesis Significance Testing)},
number = {2},
pages = {340--347},
publisher = {Wayne State University},
title = {{Statistical and Mathematical Modeling versus NHST? There's No Competition!}},
volume = {9},
year = {2010}
}
@incollection{McArdle2012,
abstract = {The author discusses exploratory data mining using classification and regression trees (CART). The following topics are addressed: a brief history of CART; selected CART applications in the behavioral sciences; methods of CART; key technical issues for CART applications; statistical testing issues in CART; available CART computer programs; a simple example—using CART to predict coronary heart disease from age; CART successes and failures;and multivariate extensions using a CART approach.},
author = {McArdle, John J.},
booktitle = {APA handbook of research methods in psychology, Vol 3: Data analysis and research publication.},
doi = {10.1037/13621-020},
month = {mar},
pages = {405--421},
publisher = {American Psychological Association},
title = {{Exploratory data mining using CART in the behavioral sciences.}},
year = {2012}
}
@article{Szollosi2019,
author = {Szollosi, Aba and Kellen, David and Navarro, Danielle and Shiffrin, Richard and van Rooij, Iris and Zandt, Trisha Van and Donkin, Chris},
doi = {10.31234/OSF.IO/X36PZ},
keywords = {Meta,Social and Behavioral Sciences,Theory and Philosophy of Science,metascience,preregistration,science,theory},
publisher = {PsyArXiv},
title = {{Preregistration is redundant, at best}},
year = {2019}
}
@incollection{Rozeboom1997,
address = {Mahwah, NJ},
author = {Rozeboom, William W},
booktitle = {What if there were no significance tests?},
editor = {Harlow, Lisa L and Mulaik, Stanley A. and Steiger, James H},
pages = {335--391},
publisher = {Erlbaum},
title = {{Good science is abductive, not hypothetico-deductive}},
year = {1997}
}
@article{Jones2000,
abstract = {The conventional procedure for null hypothesis significance testing has long been the target of appropriate criticism. A more reasonable alternative is proposed, one that not only avoids the unrealistic postulation of a null hypothesis but also, for a given parametric difference and a given error probability, is more likely to report the detection of that difference.},
author = {Jones, Lyle V. and Tukey, John W.},
doi = {10.1037/1082-989X.5.4.411},
issn = {1082989X},
journal = {Psychological Methods},
number = {4},
pages = {411--414},
publisher = {American Psychological Association Inc.},
title = {{A sensible formulation of the significance test}},
volume = {5},
year = {2000}
}
@article{Jones1952,
abstract = {"The failure, among psychologists, to utilize the one-tailed statistical test, where it is appropriate, very likely is due to the propagation of the two-tailed model by writers of text-books in psychological statistics. It is typical, in such texts, to find little or no attention given to one-tailed tests. Since the test of the null hypothesis against a one-sided alternative is the most powerful test for all directional hypotheses, it is strongly recommended that the one-tailed model be adopted wherever its use is appropriate." (PsycINFO Database Record (c) 2006 APA, all rights reserved). {\textcopyright} 1952 American Psychological Association.},
author = {Jones, Lyle V.},
doi = {10.1037/h0056832},
issn = {00332909},
journal = {Psychological Bulletin},
keywords = {HYPOTHESIS, TESTING, ONE-TAILED MODEL,STATISTICS,STATISTICS, TESTS, ONE-TAILED},
month = {jan},
number = {1},
pages = {43--46},
title = {{Test of hypotheses: one-sided vs. two-sided alternatives}},
volume = {49},
year = {1952}
}
@article{Rozeboom1960,
abstract = {Though several serious objections to the null-hypothesis significance test method are raised, "its most basic error lies in mistaking the aim of a scientific investigation to be a decision, rather than a cognitive evaluation... It is further argued that the proper application of statistics to scientific inference is irrevocably committed to extensive consideration of inverse probabilities, and to further this end, certain suggestions are offered." (PsycINFO Database Record (c) 2006 APA, all rights reserved). {\textcopyright} 1960 American Psychological Association.},
author = {Rozeboom, William W.},
doi = {10.1037/h0042040},
isbn = {9781315134918},
issn = {00332909},
journal = {Psychological Bulletin},
keywords = {NULL HYPOTHESIS, SIGNIFICANCE TEST, FALLACY OF,STATISTICAL METHODS},
month = {sep},
number = {5},
pages = {416--428},
title = {{The fallacy of the null-hypothesis significance test}},
volume = {57},
year = {1960}
}
@article{Messick1998,
abstract = {[In this note I comment briefly on Keith Markus's illuminating article on "Science, measurement, and validity: Is completion of Samuel Messick's synthesis possible?" Markus's analysis bears directly on the controversial status of the consequential basis of test validity in relation to the more traditional evidential basis. After addressing some key points in his argument, I then comment more generally on sources of the controversy over the claim that empirical consequences of test interpretation and use constitute validity evidence.]},
author = {Messick, Samuel},
issn = {03038300, 15730921},
journal = {Social Indicators Research},
number = {1/3},
pages = {35--44},
publisher = {Springer},
title = {{Test Validity: A Matter of Consequence}},
url = {http://www.jstor.org/stable/27522333},
volume = {45},
year = {1998}
}
@misc{gelman_windschanged,
author = {Gelman, Andrew},
title = {{What has happened down here is the winds have changed « Statistical Modeling, Causal Inference, and Social Science}},
url = {https://statmodeling.stat.columbia.edu/2016/09/21/what-has-happened-down-here-is-the-winds-have-changed/},
urldate = {2019-11-04}
}
@article{Schonbrodt2017,
abstract = {Unplanned optional stopping rules have been criticized for inflating Type I error rates under the null hypothesis significance testing (NHST) paradigm. Despite these criticisms, this research practice is not uncommon, probably because it appeals to researcher's intuition to collect more data to push an indecisive result into a decisive region. In this contribution, we investigate the properties of a procedure for Bayesian hypothesis testing that allows optional stopping with unlimited multiple testing, even after each participant. In this procedure, which we call Sequential Bayes Factors (SBFs), Bayes factors are computed until an a priori defined level of evidence is reached. This allows flexible sampling plans and is not dependent upon correct effect size guesses in an a priori power analysis. We investigated the long-term rate of misleading evidence, the average expected sample sizes, and the biasedness of effect size estimates when an SBF design is applied to a test of mean differences between 2 groups. Compared with optimal NHST, the SBF design typically needs 50{\%} to 70{\%} smaller samples to reach a conclusion about the presence of an effect, while having the same or lower long-term rate of wrong inference.},
author = {Sch{\"{o}}nbrodt, Felix D. and Wagenmakers, Eric Jan and Zehetleitner, Michael and Perugini, Marco},
doi = {10.1037/met0000061},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
number = {2},
pmid = {26651986},
title = {{Sequential hypothesis testing with Bayes factors: Efficiently testing mean differences}},
volume = {22},
year = {2017}
}
@article{Ly,
abstract = {5 Harold Jeffreys pioneered the development of default Bayes factor hypothesis tests for standard statistical problems. Using Jeffreys's Bayes factor hypothesis tests, researchers can grade the decisiveness of the evidence that the data provide for a point null hypothesis H 0 versus a composite alternative hypothesis H 1. Consequently, Jeffreys's tests are of considerable theoretical and practical relevance for empirical researchers in general and for experimental psychologists in particular. To highlight this relevance and to facilitate the interpretation and use of Jeffreys's Bayes factor tests we focus on two common inferential scenarios: testing the nullity of a normal mean (i.e., the Bayesian equivalent of the t-test) and testing the nullity of a correlation. For both Bayes factor tests, we explain their development, we extend them to one-sided problems, and we apply them to concrete examples from experimental psychology. Consider the common scenario where a researcher entertains two competing hypotheses. 7 One, the null hypothesis H 0 , is implemented as a statistical model that stipulates the nullity of 8 a parameter of interest (i.e., µ = 0); the other, the alternative hypothesis H 1 , is implemented 9 as a statistical model that allows the parameter of interest to differ from zero. How should 10 one quantify the relative support that the observed data provide for H 0 versus H 1 ? Harold 11 Jeffreys argued that this is done by assigning prior mass to the point null hypothesis (or 12 "general law") H 0 , and then calculate the degree to which the data shift one's prior beliefs 13 about the relative plausibility of H 0 versus H 1 .},
author = {Ly, Alexander and Verhagen, Josine and Wagenmakers, Eric-Jan},
doi = {10.1016/j.jmp.2015.06.004},
file = {::},
journal = {Journal of Mathematical Psychology},
keywords = {Bayes factors,Harold Jeffreys 6,Model Selection},
pages = {19--32},
title = {{Harold Jeffreys's Default Bayes Factor Hypothesis Tests: Explanation, Extension, and Application in Psychology}},
url = {www.economics.soton.ac.uk/staff/aldrich/jeffreysweb.htm.},
volume = {72},
year = {2016}
}
@article{Wagenmakers2016,
abstract = {According to the facial feedback hypothesis, people's affective responses can be influenced by their own facial expression (e.g., smiling, pouting), even when their expression did not result from their emotional experiences. For example, Strack, Martin, and Stepper (1988) instructed participants to rate the funniness of cartoons using a pen that they held in their mouth. In line with the facial feedback hypothesis, when participants held the pen with their teeth (inducing a “smile”), they rated the cartoons as funnier than when they held the pen with their lips (inducing a “pout”). This seminal study of the facial feedback hypothesis has not been replicated directly. This Registered Replication Report describes the results of 17 independent direct replications of Study 1 from Strack et al. (1988), all of which followed the same vetted protocol. A meta-analysis of these studies examined the difference in funniness ratings between the “smile” and “pout” conditions. The original Strack et al. (1988) study reported a rating difference of 0.82 units on a 10-point Likert scale. Our meta-analysis revealed a rating difference of 0.03 units with a 95{\%} confidence interval ranging from −0.11 to 0.16.},
author = {Wagenmakers, E. J. and Beek, Titia and Dijkhoff, Laura and Gronau, Quentin F. and Acosta, A. and Adams, R. B. and Albohn, D. N. and Allard, E. S. and Benning, S. D. and Blouin-Hudon, E. M. and Bulnes, L. C. and Caldwell, T. L. and Calin-Jageman, R. J. and Capaldi, C. A. and Carfagno, N. S. and Chasten, K. T. and Cleeremans, A. and Connell, L. and DeCicco, J. M. and Dijkstra, K. and Fischer, A. H. and Foroni, Francesco and Hess, U. and Holmes, K. J. and Jones, J. L.H. and Klein, O. and Koch, C. and Korb, S. and Lewinski, P. and Liao, J. D. and Lund, S. and Lupi{\'{a}}{\~{n}}ez, J. and Lynott, D. and Nance, C. N. and Oosterwijk, S. and {\"{O}}zdoğru, A. A. and Pacheco-Unguetti, A. P. and Pearson, B. and Powis, C. and Riding, S. and Roberts, T. A. and Rumiati, R. I. and Senden, M. and Shea-Shumsky, N. B. and Sobocko, K. and Soto, J. A. and Steiner, T. G. and Talarico, J. M. and van Allen, Z. M. and Vandekerckhove, M. and Wainwright, B. and Wayand, J. F. and Zeelenberg, R. and Zetzer, E. E. and Zwaan, R. A.},
doi = {10.1177/1745691616674458},
issn = {17456924},
journal = {Perspectives on Psychological Science},
number = {6},
title = {{Registered Replication Report: Strack, Martin, {\&} Stepper (1988)}},
volume = {11},
year = {2016}
}
@book{McElreath2016,
address = {Boca Raton, Florida},
author = {McElreath, Richard},
file = {::},
isbn = {978-1482253443},
publisher = {Taylor {\&} Francis},
title = {{Statistical Rethinking: A Bayesian Course with Examples in R and Stan}},
year = {2016}
}
@article{Cowles1982,
abstract = {Examination of the literature indicates that although E. Fisher (1925) is responsible for the first formal statement of the .05 criterion for statistical significance (SS), the concept goes back much further. The move toward conventional levels for the rejection of the hypothesis of chance dates from the turn of the century. Early statements about SS were given in terms of the probable error. These earlier conventions were adopted and restated by Fisher. (24 ref) (PsycINFO Database Record (c) 2006 APA, all rights reserved). {\textcopyright} 1982 American Psychological Association.},
author = {Cowles, Michael and Davis, Caroline},
doi = {10.1037/0003-066X.37.5.553},
issn = {0003066X},
journal = {American Psychologist},
keywords = {history of .05 criterion for statistical significa},
month = {may},
number = {5},
pages = {553--558},
title = {{On the origins of the .05 level of statistical significance}},
volume = {37},
year = {1982}
}
@article{Bond2003,
abstract = {This article discusses the meta-analysis of raw mean differences. It presents a rationale for cumulating psychological effects in a raw metric and compares raw mean differences to standardized mean differences. Some limitations of standardization are noted, and statistical techniques for raw meta-analysis are described. These include a graphical device for decomposing effect sizes. Several illustrative data sets are analyzed.},
author = {Bond, Charles F and Wiitala, Wyndy L and Richard, F Dan},
doi = {10.1037/1082-989X.8.4.406},
file = {::},
issn = {1082-989X},
journal = {Psychological methods},
month = {dec},
number = {4},
pages = {406--18},
pmid = {14664679},
title = {{Meta-analysis of raw mean differences.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/14664679},
volume = {8},
year = {2003}
}
@article{Baguley2009,
abstract = {It is regarded as best practice for psychologists to report effect size when disseminating quantitative research findings. Reporting of effect size in the psychological literature is patchy - though this may be changing - and when reported it is far from clear that appropriate effect size statistics are employed. This paper considers the practice of reporting point estimates of standardized effect size and explores factors such as reliability, range restriction and differences in design that distort standardized effect size unless suitable corrections are employed. For most purposes simple (unstandardized) effect size is more robust and versatile than standardized effect size. Guidelines for deciding what effect size metric to use and how to report it are outlined. Foremost among these are: (i) a preference for simple effect size over standardized effect size, and (ii) the use of confidence intervals to indicate a plausible range of values the effect might take. Deciding on the appropriate effect size statistic to report always requires careful thought and should be influenced by the goals of the researcher, the context of the research and the potential needs of readers. {\textcopyright} 2009 The British Psychological Society.},
author = {Baguley, Thom},
doi = {10.1348/000712608X377117},
file = {::},
issn = {20448295},
journal = {British Journal of Psychology},
month = {aug},
number = {3},
pages = {603--617},
title = {{Standardized or simple effect size: What should be reported?}},
volume = {100},
year = {2009}
}
@article{Rothman2010,
author = {Rothman, Kenneth J.},
doi = {10.1007/s10654-010-9437-5},
file = {::},
issn = {03932990},
journal = {European Journal of Epidemiology},
month = {apr},
number = {4},
pages = {223--224},
title = {{Curbing Type I and Type II Errors}},
volume = {25},
year = {2010}
}
@article{GREENLAND1986,
author = {GREENLAND, SANDER and SCHLESSELMAN, JAMES J. and CRIQUI, MICHAEL H.},
doi = {10.1093/oxfordjournals.aje.a114229},
file = {::},
issn = {1476-6256},
journal = {American Journal of Epidemiology},
month = {feb},
number = {2},
pages = {203--208},
title = {{THE FALLACY OF EMPLOYING STANDARDIZED REGRESSION COEFFICIENTS AND CORRELATIONS AS MEASURES OF EFFECT}},
url = {https://academic.oup.com/aje/article/57724/THE},
volume = {123},
year = {1986}
}
@article{Wickham2011a,
author = {Wickham, Hadley and Cook, Dianne and Hofmann, Heike and Buja, Andreas},
journal = {Journal of Statistical Software},
number = {2},
pages = {1--18},
publisher = {Foundation for Open Access Statistics},
title = {{tourr: An R package for exploring multivariate data with projections}},
volume = {40},
year = {2011}
}
@article{Tang2016,
author = {Tang, Yuan and Horikoshi, Masaaki and Li, Wenxuan},
journal = {The R Journal},
number = {2},
pages = {474--489},
title = {{ggfortify: unified interface to visualize statistical results of popular R packages}},
volume = {8},
year = {2016}
}
@article{Wickham2011b,
author = {Wickham, Hadley},
doi = {10.1002/wics.147},
issn = {1939-5108},
journal = {Wiley Interdisciplinary Reviews: Computational Statistics},
number = {2},
pages = {180--185},
publisher = {Wiley Online Library},
title = {ggplot2},
volume = {3},
year = {2011}
}
@misc{Wilke2016,
author = {Wilke, Claus O},
booktitle = {R Package Version 1.0.0},
title = {cowplot: streamlined plot theme and plot annotations for ‘ggplot2'},
url = {http://www.cran.r-project.org/package=cowplot},
year = {2016}
}
@misc{RStudioTeam2016,
address = {Boston, MA},
author = {{RStudio Team}},
institution = {RStudio, Inc.},
title = {{RStudio: Integrated Development Environment for R}},
url = {http://www.rstudio.com/},
year = {2016}
}
@book{Xie2018,
address = {Boca Raton, Florida},
annote = {ISBN 9781138359338},
author = {Xie, Yihui and Allaire, J J and Grolemund, Garrett},
publisher = {Chapman and Hall/CRC},
title = {{R Markdown: The Definitive Guide}},
url = {https://bookdown.org/yihui/rmarkdown},
year = {2018}
}
@misc{Allaire,
annote = {R package version 0.11.1},
author = {Allaire, J J and Xie, Yihui and {R Foundation} and Wickham, Hadley and {Journal of Statistical Software} and Vaidyanathan, Ramnath and {Association for Computing Machinery} and Boettiger, Carl and Elsevier and Broman, Karl and Mueller, Kirill and Quast, Bastiaan and Pruim, Randall and Marwick, Ben and Wickham, Charlotte and Keyes, Oliver and Yu, Miao and Emaasit, Daniel and Onkelinx, Thierry and Gasparini, Alessandro and Desautels, Marc-Andre and Leutnant, Dominik and MDPI and {Taylor and Francis} and {\"{O}}ğreden, Oğuzhan and Hance, Dalton and N{\"{u}}st, Daniel and Uvesten, Petter and Campitelli, Elio and Muschelli, John and Kamvar, Zhian N and Ross, Noam},
title = {{rticles: Article Formats for R Markdown}},
url = {https://github.com/rstudio/rticles}
}
@article{Fife2017,
author = {Fife, Dustin A. and Mendoza, Jorge L and Berry, Christopher M},
issn = {0027-3171},
journal = {Multivariate behavioral research},
number = {2},
pages = {164--177},
publisher = {Taylor {\&} Francis},
title = {{Estimating Incremental Validity Under Missing Data}},
volume = {52},
year = {2017}
}
@misc{Fife2013,
address = {Norman},
author = {Fife, Dustin A.},
publisher = {University of Oklahoma},
title = {{The Achilles heel of psychology: How convenience sampling affects parameter estimates (Unpublished doctoral dissertation)}},
year = {2013}
}
@article{Fife2018,
author = {Fife, Dustin A. and Mendoza, Jorge and Day, Eric and Terry, Robert},
issn = {1094-4281},
journal = {Organizational Research Methods},
pages = {1094428118799492},
publisher = {SAGE Publications Sage CA: Los Angeles, CA},
title = {{Estimating Subgroup Differences in Staffing Research When the Selection Mechanism Is Unknown: A Response to Li's Case IV Correction}},
year = {2018}
}
@article{Strobl2007,
author = {Strobl, Carolin and Boulesteix, Anne-Laure and Zeileis, Achim and Hothorn, Torsten},
issn = {1471-2105},
journal = {BMC bioinformatics},
number = {1},
pages = {25},
publisher = {BioMed Central},
title = {{Bias in random forest variable importance measures: Illustrations, sources and a solution}},
volume = {8},
year = {2007}
}
@misc{Fife2013a,
address = {Norman, OK},
author = {Fife, Dustin A.},
publisher = {University of Oklahoma},
title = {{The Achilles heel of psychology: How convenience sampling affects parameter estimates (Unpublished doctoral dissertation)}},
url = {https://shareok.org/handle/11244/319258},
year = {2013}
}
@article{Strobl2007a,
author = {Strobl, Carolin and Boulesteix, Anne-Laure and Zeileis, Achim and Hothorn, Torsten},
issn = {1471-2105},
journal = {BMC bioinformatics},
number = {1},
pages = {25},
publisher = {BioMed Central},
title = {{Bias in random forest variable importance measures: Illustrations, sources and a solution}},
volume = {8},
year = {2007}
}
@article{Fife2018a,
author = {Fife, Dustin A. and Mendoza, Jorge and Day, Eric and Terry, Robert},
issn = {1094-4281},
journal = {Organizational Research Methods},
pages = {1094428118799492},
publisher = {SAGE Publications Sage CA: Los Angeles, CA},
title = {{Estimating Subgroup Differences in Staffing Research When the Selection Mechanism Is Unknown: A Response to Li's Case IV Correction}},
year = {2018}
}
@article{Fife2017a,
author = {Fife, Dustin A. and Mendoza, Jorge L and Berry, Christopher M},
issn = {0027-3171},
journal = {Multivariate behavioral research},
number = {2},
pages = {164--177},
publisher = {Taylor {\&} Francis},
title = {{Estimating Incremental Validity Under Missing Data}},
volume = {52},
year = {2017}
}
@techreport{Wiernik,
annote = {This talks about measurement error/range restriction on NON IO research and says we need to correct for these in all psychological research. Mentioned briefly the idea of a conditioning on a collider via convenience sampling},
author = {Wiernik, Brenton M and Dahlke, Jeffrey A},
file = {::},
title = {{Obtaining Unbiased Results in Meta-Analysis: The Importance of Correcting for Statistical Artefacts}}
}
@article{Rights2019,
abstract = {AbstractWhen comparing multilevel models (MLMs) differing in fixed and/or random effects, researchers have had continuing interest in using R-squared differences to communicate effect size and impo...},
author = {Rights, Jason D. and Sterba, Sonya K.},
doi = {10.1080/00273171.2019.1660605},
journal = {https://doi.org/10.1080/00273171.2019.1660605},
pages = {1--32},
title = {{New Recommendations on the Use of R-Squared Differences in Multilevel Model Comparisons}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2019.1660605},
year = {2019}
}
@article{Marino2019,
abstract = {AbstractDrop out is a typical issue in longitudinal studies. When the missingness is non-ignorable, inference based on the observed data only may be biased. This paper is motivated by the Leiden 85...},
author = {Marino, Maria Francesca and Alf{\`{o}}, Marco},
doi = {10.1080/00273171.2019.1660606},
journal = {https://doi.org/10.1080/00273171.2019.1660606},
pages = {1--17},
title = {{Finite Mixtures of Hidden Markov Models for Longitudinal Responses Subject to Drop out}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2019.1660606},
year = {2019}
}
@article{Kaplan2019,
abstract = {AbstractThis paper considers the problem of imputation model uncertainty in the context of missing data problems. We argue that so-called “Bayesianly proper” approaches to multiple imputation, alth...},
author = {Kaplan, David and Yavuz, Sinan},
doi = {10.1080/00273171.2019.1657790},
journal = {https://doi.org/10.1080/00273171.2019.1657790},
pages = {1--15},
title = {{An Approach to Addressing Multiple Imputation Model Uncertainty Using Bayesian Model Averaging}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2019.1657790},
year = {2019}
}
@techreport{Wickham2011,
abstract = {This paper describes an R package which produces tours of multivariate data. The package includes functions for creating different types of tours, including grand, guided, and little tours, which project multivariate data (p-D) down to 1, 2, 3, or, more generally, d (≤ p) dimensions. The projected data can be rendered as densities or histograms, scatterplots, anaglyphs, glyphs, scatterplot matrices, parallel coordinate plots, time series or images, and viewed using an R graphics device, passed to GGobi, or saved to disk. A tour path can be stored for visualisation or replay. With this package it is possible to quickly experiment with different, and new, approaches to tours of data. This paper contains animations that can be viewed using the Adobe Acrobat PDF viewer.},
author = {Wickham, Hadley and Cook, Dianne and Hofmann, Heike and Buja, Andreas},
file = {::},
keywords = {correlation tour,grand tour,guided tour,little tour,local tour,projection pursuit,statistical graphics,visual data mining,visualization},
number = {2},
title = {{Journal of Statistical Software tourr: An R Package for Exploring Multivariate Data with Projections}},
url = {http://www.jstatsoft.org/},
volume = {40},
year = {2011}
}
@techreport{Greenwell,
abstract = {Complex nonparametric models-like neural networks, random forests, and support vector machines-are more common than ever in predictive analytics, especially when dealing with large observational databases that don't adhere to the strict assumptions imposed by traditional statistical techniques (e.g., multiple linear regression which assumes linearity, homoscedasticity, and normality). Unfortunately, it can be challenging to understand the results of such models and explain them to management. Partial dependence plots offer a simple solution. Partial dependence plots are low-dimensional graphical renderings of the prediction function so that the relationship between the outcome and predictors of interest can be more easily understood. These plots are especially useful in explaining the output from black box models. In this paper, we introduce pdp, a general R package for constructing partial dependence plots.},
author = {Greenwell, Brandon M},
file = {::},
title = {{pdp: An R Package for Constructing Partial Dependence Plots}},
url = {https://github.com/bgreenwell/pdp/issues.}
}
@article{Johnson2012a,
abstract = {This article discusses three well-known methods for obtaining a graphical evaluation of the effect of adding an explanatory variable in linear regression. A new method is also proposed.},
author = {Johnson, Bradford W. and McCulloch, Robert E.},
doi = {10.1080/00401706.1987.10488270},
journal = {http://dx.doi.org/10.1080/00401706.1987.10488270},
number = {4},
pages = {427--433},
title = {{Added – Variable Plots in Linear Regression}},
url = {http://www.tandfonline.com/doi/abs/10.1080/00401706.1987.10488270},
volume = {29},
year = {2012}
}
@article{Breheny,
abstract = {Regression models allow one to isolate the relationship between the outcome and an explanatory variable while the other variables are held constant. Here, we introduce an R package, visreg, for the convenient visualization of this relationship via short, simple function calls. In addition to estimates of this relationship, the package also provides pointwise confidence bands and partial residuals to allow assessment of variability as well as outliers and other deviations from modeling assumptions. The package provides several options for visualizing models with interactions, including lattice plots, contour plots, and both static and interactive perspective plots. The implementation of the package is designed to be fully object-oriented and interface seamlessly with R's rich collection of model classes, allowing a consistent interface for visualizing not only linear models, but generalized linear models, proportional hazards models, generalized additive models, robust regression models, and many more.},
author = {Breheny, Patrick and Burchett, Woodrow},
doi = {10.32614/rj-2017-046},
file = {::},
journal = {The R Journal},
pages = {56--71},
title = {{Visualization of Regression Models Using visreg}},
year = {2017}
}
@article{Otten2015,
abstract = {Data visualization combines principles from psychology, usability, graphic design, and statistics to highlight important data in accessible and appealing formats. Doing so helps bridge knowledge pr...},
annote = {Has some good references about visual processing system and it makes the point that it improves communication, but it focuses more on infographics.},
author = {Otten, Jennifer J. and Cheng, Karen and Drewnowski, Adam},
doi = {10.1377/hlthaff.2015.0642},
file = {::},
issn = {0278-2715},
journal = {Health Affairs},
keywords = {Data Visualization,Infographics,Nutrition and Food Studies,Policy Communication,Visual Design},
month = {nov},
number = {11},
pages = {1901--1907},
publisher = {Health Affairs},
title = {{Infographics And Public Policy: Using Data Visualization To Convey Complex Information}},
url = {http://www.healthaffairs.org/doi/10.1377/hlthaff.2015.0642},
volume = {34},
year = {2015}
}
@article{Ones2003,
author = {Ones, Deniz S. and Viswesvaran, Chockalingam},
doi = {10.1037/0021-9010.88.3.570},
issn = {1939-1854},
journal = {Journal of Applied Psychology},
number = {3},
pages = {570--577},
title = {{Job-specific applicant pools and national norms for personality scales: Implications for range-restriction corrections in validation research.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0021-9010.88.3.570},
volume = {88},
year = {2003}
}
@article{SCHMIDT2008,
annote = {Used Case IV to correct old meta analyses and “showed” that G is an ever better predictor than previously thought. Did show a path analysis multiple regression model (a composite) and they were corrected for RR.},
author = {Schmidt, Frank L. and Shaffer, J and Oh, In-Sue},
doi = {10.1111/j.1744-6570.2008.00132.x},
issn = {00315826},
journal = {Personnel Psychology},
month = {dec},
number = {4},
pages = {827--868},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{INCREASED ACCURACY FOR RANGE RESTRICTION CORRECTIONS: IMPLICATIONS FOR THE ROLE OF PERSONALITY AND GENERAL MENTAL ABILITY IN JOB AND TRAINING PERFORMANCE}},
url = {http://doi.wiley.com/10.1111/j.1744-6570.2008.00132.x},
volume = {61},
year = {2008}
}
@article{Hunter1986,
abstract = {This paper reviews the hundreds of studies showing that general cognitive ability predicts job performance in all jobs. The first section shows that general cognitive ability predicts supervisor ratings and training success. The second section shows that general cognitive ability predicts objective, rigorously content valid work sample performance with even higher validity. Path analysis shows that much of this predictive power stems from the fact that general cognitive ability predicts job knowledge (r = .80 for civilian jobs) and job knowledge predicts job performance (r = .80). However, cognitive ability predicts performance beyond this value (r = .75 versus r = [.80][.80] = .64) verifying job analyses showing that most major cognitive skills are used in everyday work. The third section of the paper briefly reviews evidence showing that it is general cognitive ability and not specific cognitive aptitudes that predict performance.},
annote = {Made arguments that mechanical composite do no matter then general cognitive ability},
author = {Hunter, John E},
doi = {10.1016/0001-8791(86)90013-8},
file = {::},
journal = {Journal of Vocational Behavior},
month = {dec},
number = {3},
pages = {340--362},
publisher = {Academic Press},
title = {{Cognitive ability, cognitive aptitudes, job knowledge, and job performance}},
url = {https://www.sciencedirect.com/science/article/pii/0001879186900138},
volume = {29},
year = {1986}
}
@article{Cleveland1994,
author = {Cleveland, William S},
issn = {0749-2170},
journal = {Lecture Notes-Monograph Series},
pages = {21--36},
publisher = {JSTOR},
title = {{Coplots, nonparametric regression, and conditionally parametric fits}},
year = {1994}
}
@article{GANZACH2000,
annote = {only mentions selection without making any corrections. (Just says that the correlations are likely attenuated).},
author = {GANZACH, YOAV and KLUGER, AVRAHAM N. and KLAYMAN, NIMROD},
doi = {10.1111/j.1744-6570.2000.tb00191.x},
issn = {0031-5826},
journal = {Personnel Psychology},
month = {mar},
number = {1},
pages = {1--20},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{MAKING DECISIONS FROM AN INTERVIEW: EXPERT MEASUREMENT AND MECHANICAL COMBINATION}},
url = {http://doi.wiley.com/10.1111/j.1744-6570.2000.tb00191.x},
volume = {53},
year = {2000}
}
@article{Healy2014a,
abstract = {Visualizing data is central to social scientific work. Despite a promising early beginning, sociology has lagged in the use of visual tools. We review the history and current state of visualization...},
author = {Healy, Kieran and Moody, James},
doi = {10.1146/ANNUREV-SOC-071312-145551},
file = {::},
journal = {Annual Review of Sociology},
keywords = {EDA vs. CDA: misunderstandings,Statistics,graphics,graphics: condensing information,misconceptions,reason not used,software defaults,superiority of,teachers of statistics,visualization,visualizations in teaching},
mendeley-tags = {EDA vs. CDA: misunderstandings,Statistics,graphics,graphics: condensing information,misconceptions,reason not used,software defaults,superiority of,teachers of statistics,visualization,visualizations in teaching},
number = {1},
pages = {105--128},
title = {{Data Visualization in Sociology}},
url = {http://www.annualreviews.org/doi/10.1146/annurev-soc-071312-145551},
volume = {40},
year = {2014}
}
@article{McShane2015,
abstract = {Statistical training helps individuals analyze and interpret data. However, the emphasis placed on null hypothesis significance testing in academic training and reporting may lead researchers to in...},
annote = {Great paper. Makes the point that training actuallly harms the evaluation of evidence. People ignore evidence when p},
author = {McShane, Blakeley B. and Gal, David},
doi = {10.1287/MNSC.2015.2212},
file = {::},
journal = {http://dx.doi.org/10.1287/mnsc.2015.2212},
keywords = {Cognition,NHST: cause of replication crisis,NHST: human factors problem,Statistics, misconceptions,bayesian, evidence it works,biases,dichotomous decision making, problems with,reform, student training,replication crisis: causes,statistical reasoning,teachers of statistics,training, diminishes evidence evaluation},
mendeley-tags = {Cognition,NHST: cause of replication crisis,NHST: human factors problem,Statistics, misconceptions,bayesian, evidence it works,biases,dichotomous decision making, problems with,reform, student training,replication crisis: causes,statistical reasoning,teachers of statistics,training, diminishes evidence evaluation},
number = {6},
pages = {1707--1718},
title = {{Blinding Us to the Obvious? The Effect of Statistical Training on the Evaluation of Evidence}},
url = {http://pubsonline.informs.org/doi/10.1287/mnsc.2015.2212},
volume = {62},
year = {2015}
}
@article{Levine2013,
abstract = {The principles of open collaboration for innovation (and production), once distinctive to open source software, are now found in many other ventures. Some of these ventures are Internet based: for ...},
annote = {Nothing too relevant. Just talks about why open source survives.},
author = {Levine, Sheen S. and Prietula, Michael J.},
doi = {10.1287/ORSC.2013.0872},
file = {::},
journal = {https://doi.org/10.1287/orsc.2013.0872},
number = {5},
pages = {1414--1433},
title = {{Open Collaboration for Innovation: Principles and Performance}},
url = {http://pubsonline.informs.org/doi/abs/10.1287/orsc.2013.0872},
volume = {25},
year = {2013}
}
@article{Wilkinson1999,
author = {Wilkinson, Leland},
issn = {0003-1305},
journal = {The American Statistician},
number = {3},
pages = {276--281},
publisher = {Taylor {\&} Francis},
title = {{Dot plots}},
volume = {53},
year = {1999}
}
@book{Tukey1990,
author = {Tukey, John W. and Tukey, P A},
booktitle = {Technical Memorandum},
publisher = {Bellcore},
title = {{Strips Displaying Empirical Distributions: I. Textured Dot Strips}},
year = {1990}
}
@article{Lee2019,
abstract = {A Monte Carlo simulation study is an essential tool for examining the behavior of various models in structural equation modeling (SEM). Recently, the tidyverse package in R is gaining popularity for data science because of its efficient data manipulation, exploration, and visualization capabilities. This article introduces how to write more parsimonious, readable, maintainable, and parallelizable R simulation codes using the tidyverse package. Specifically, this article (a) introduces some key functions and technical terminologies in the tidyverse package that are useful for implementing simulation studies in R, and (b) provides a concrete example to demonstrate how to generate datasets, run models, parallelize the simulation process, summarize results, and visualize results using the tidyverse package. By leveraging the power of the tidyverse package, researchers can conduct their simulation studies more efficiently.},
author = {Lee, Sunbok and Sriutaisuk, Suppanut and Kim, Hanjoe},
doi = {10.1080/10705511.2019.1644515},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
pages = {1--15},
title = {{Using the Tidyverse Package in R for Simulation Studies in SEM}},
year = {2019}
}
@book{Hansen,
address = {London},
annote = {Not terribly helpful. Lots of theoretical information in the first chapter, but not quite what I was looking for. But I did find that jittered density plots have already been invented by tukey. He called them textured dot strips.},
author = {Hansen, Charles D and Chen, Min and Johnson, Christopher R and Kaufman, Arie E and Hagen, Hans},
file = {::},
publisher = {Springer},
title = {{Scientific Visualization: Uncertainty, Multifield, Biomedical, and Scalable Visualization}},
url = {http://www.springer.com/series/4562},
year = {2014}
}
@inproceedings{Johnson2012,
author = {Johnson, Chris and Ford, Brian and Pascual, Pasky and Hickman, Kyle and Goldstein, Michael},
booktitle = {IFIP Advances in Information and Communication Technology},
doi = {10.1007/978-3-642-32677-6_15},
file = {::},
isbn = {9783642326769},
issn = {18684238},
pages = {248--249},
title = {{Discussion}},
volume = {377 AICT},
year = {2012}
}
@article{Bonneau2014,
abstract = {The goal of visualization is to effectively and accurately communicate data. Visualization research has often overlooked the errors and uncertainty which accompany the scientific process and describe key characteristics used to fully understand the data. The lack of these representations can be attributed, in part, to the inherent difficulty in defining, characterizing, and controlling this uncertainty, and in part, to the difficulty in including additional visual metaphors in a well designed, potent display. However, the exclusion of this information cripples the use of visualization as a decision making tool due to the fact that the display is no longer a true representation of the data. This systematic omission of uncertainty commands fundamental research within the visualization community to address, integrate, and expect uncertainty information. In this chapter, we outline sources and models of uncertainty, give an overview of the state-of-the-art, provide general guidelines, outline small exemplary applications, and finally, discuss open problems in uncertainty visualization.},
author = {Bonneau, Georges Pierre and Hege, Hans Christian and Johnson, Chris R. and Oliveira, Manuel M. and Potter, Kristin and Rheingans, Penny and Schultz, Thomas},
doi = {10.1007/978-1-4471-6497-5_1},
issn = {2197666X},
journal = {Mathematics and Visualization},
pages = {3--27},
publisher = {Springer Berlin Heidelberg},
title = {{Overview and state-of-the-art of uncertainty visualization}},
volume = {37},
year = {2014}
}
@article{Levine2018,
abstract = {‘The truth is under attack', I wrote earlier this decade (Levine, 2012). As the replication crisis became apparent, the alarm was timely. But now, a counter-attack is raging. In its arsenal are replications, open data, shared instruments, pre-registration of hypotheses and now – data visualizations.},
annote = {Gives some reasons why visuals might not be frequently used: considered less scientific, fear of deviating from convention.},
author = {Levine, Sheen S.},
doi = {10.1017/mor.2018.19},
file = {::},
issn = {17408784},
journal = {Management and Organization Review},
keywords = {graphics,reason not used,replication crisis,superiority of,visualization},
mendeley-tags = {graphics,reason not used,replication crisis,superiority of,visualization},
month = {jun},
number = {2},
pages = {433--437},
publisher = {Cambridge University Press},
title = {{Show us your data: Connect the dots, improve science}},
volume = {14},
year = {2018}
}
@article{Pu,
author = {Pu, Xiaoying and Kay, Matthew},
doi = {10.31219/OSF.IO/4JBH2},
file = {::},
keywords = {Computer Sciences,Graphics and Human Computer Interfaces,Physical Sciences and Mathematics},
publisher = {OSF Preprints},
title = {{The Garden of Forking Paths in Visualization: A Design Space for Reliable Exploratory Visual Analytics}}
}
@techreport{Andr,
abstract = {In this position paper, we propose and discuss a lightweight framework to help organize research questions that arise around biases in visualization and visual analysis. We contrast our framework against cognitive bias codex by Buster Benson. The framework is inspired by Norman's Human Action Cycle [23] and classifies biases into three levels: perceptual biases, action biases, and social biases. For each of the levels of cognitive processing, we discuss examples of biases from the cognitive science literature, and speculate how they might also be important to the area of visualization. In addition, we put forward a methodological discussion on how biases might be studied on all three levels, and which pitfalls and threats to validity exist. We hope that the framework will help spark new ideas and discussions on how to proceed studying the important topic of biases in visualization.},
author = {Andr´, Andr{\'{e}} and {Calero Valdez}, Andr{\'{e}} and Ziefle, Martina and Sedlmair, Michael},
file = {::},
keywords = {H12 [Models and Principals],H52 [Information Inter-faces and Presentation],Index Terms,User Interfaces-Evaluation/methodology,User/Machine Systems-Human information processing},
title = {{A Framework for Studying Biases in Visualization Research}}
}
@article{Weston2019,
abstract = {Secondary data analysis, or the analysis of preexisting data, provides a powerful tool for the resourceful psychological scientist. Never has this been more true than now, when technological advanc...},
author = {Weston, Sara J. and Ritchie, Stuart J. and Rohrer, Julia M. and Przybylski, Andrew K.},
doi = {10.1177/2515245919848684},
file = {::},
issn = {2515-2459},
journal = {Advances in Methods and Practices in Psychological Science},
month = {jun},
pages = {251524591984868},
publisher = {SAGE Publications},
title = {{Recommendations for Increasing the Transparency of Analysis of Preexisting Data Sets}},
year = {2019}
}
@article{Swiatkowski2017,
abstract = {Over the last few years, psychology researchers have become increasingly preoccupied with the question of whether findings from psychological studies are generally replicable. The debates have originated from some unfortunate events of scientific misconduct in the field, and they have reached a climax with the recent discovery of a relatively weak rate of replicability of published literature, leading to the so-called replicability crisis in psychology. The present paper is concerned with examining the issue of replicability in the field of social psychology. We begin by drawing a state of the art of the crisis in this field. We then highlight some possible causes for the crisis, discussing topics of statistical power, questionable research practices, publication standards, and hidden auxiliary assumptions of context-dependency of social psychological theories. Finally, we argue that given the absence of absolute falsification in science, social psychology could greatly benefit from adopting McGuire's perspectivist approach to knowledge construction.},
author = {{\'{S}}wi{\c{a}}tkowski, W and Dompnier, B},
doi = {10.5334/irsp.66},
file = {::},
journal = {International Review of Social Psychology},
keywords = {Perspectivism,Publication standards,Questionable research practices,Replicability crisis,Social psychology,Statistical power},
number = {1},
pages = {111--124},
title = {{Replicability Crisis in Social Psychology: Looking at the Past to Find New Pathways for the Future}},
volume = {30},
year = {2017}
}
@article{Hirschauer2016,
abstract = {{\textcopyright} 2016, Prex S.p.A. All rights reserved. Misinterpretations of the p value and the introduction of bias through arbitrary analytical choices have been discussed in the literature for decades. Nonetheless, they seem to have persisted in empirical research, and criticisms of p value misuses have increased in the recent past due to the non-replicability of many studies. Unfortunately, the critical concerns that have been raised in the literature are scattered over many disciplines, often linguistically confusing, and differing in their main reasons for criticisms. Misuses and misinterpretations of the p value are currently intensely discussed under the label “replication crisis” in many academic disciplines and journals, ranging from specialised scientific journals to Nature and Science. In a drastic response to the crisis, the editors of the journal Basic and Applied Social Psychology even decided to ban the use of p values from future publications at the beginning of 2015, a fact that has added fuel to the discussions in the relevant scientific forums. Finally, in March 2016, the American Statistical Association released a brief statement on p values that explicitly addressed misuses and misinterpretations. In this context, we systematise the most serious flaws related to the p value and discuss suggestions of how to prevent mistakes and reduce the rate of false discoveries in the future.},
author = {Hirschauer, Norbert and Musshoff, Oliver and Gruener, Sven and Frey, Ulrich and Theesfeld, Insa and Wagner, Peter},
doi = {10.2427/12066},
file = {::},
issn = {22820930},
journal = {Epidemiology Biostatistics and Public Health},
keywords = {Bayes' Theorem,Error probability,Hypothesis testing,Replication crisis,Statistical significance,p values,p-hacking},
number = {4},
pages = {e12066--1--e12066--16},
title = {{Inferential misconceptions and replication crisis}},
volume = {13},
year = {2016}
}
@article{Stevens2017,
abstract = {Psychology faces a replication crisis. The Reproducibility Project: Psychology sought to replicate the effects of 100 psychology studies. Though 97{\%} of the original studies produced statistically significant results, only 36{\%} of the replication studies did so (Open Science Collaboration, 2015). This inability to replicate previously published results, however, is not limited to psychology (Ioannidis, 2005). Replication projects in medicine (Prinz et al., 2011) and behavioral economics (Camerer et al., 2016) resulted in replication rates of 25 and 61{\%}, respectively, and analyses in genetics (Munaf{\`{o}}, 2009) and neuroscience (Button et al., 2013) question the validity of studies in those fields. Science, in general, is reckoning with challenges in one of its basic tenets: replication. Comparative psychology also faces the grand challenge of producing replicable research. Though social psychology has born the brunt of most of the critique regarding failed replications, comparative psychology suffers from some of the same problems faced by social psychology (e.g., small sample sizes). Yet, comparative psychology follows the methods of cognitive psychology by often using within-subjects designs, which may buffer it from replicability problems (Open Science Collaboration, 2015). In this Grand Challenge article, I explore the shared and unique challenges of and potential solutions for replication and reproducibility in comparative psychology.},
annote = {They equate EDA with fishing},
author = {Stevens, Jeffrey R.},
doi = {10.3389/fpsyg.2017.00862},
file = {::},
issn = {16641078},
journal = {Frontiers in Psychology},
keywords = {Animal research,Comparative psychology,EDA vs. CDA: misunderstandings,Pre-registration,Replication,Reproducible research},
mendeley-tags = {EDA vs. CDA: misunderstandings},
month = {may},
number = {MAY},
publisher = {Frontiers Media S.A.},
title = {{Replicability and reproducibility in comparative psychology}},
volume = {8},
year = {2017}
}
@article{Hollenbeck2016,
abstract = {In this editorial we discuss the problems associated with HARKing (Hypothesizing After Results Are Known) and draw a distinction between Sharking (Secretly HARKing in the Introduction section) and Tharking (Transparently HARKing in the Discussion section). Although there is never any justifica- tion for the process of Sharking, we argue that Tharking can promote the effectiveness and efficiency of both scientific inquiry and cumulative knowledge creation. We argue that the discussion sections of all empirical papers should include a subsection that reports post hoc exploratory data analysis. We explain how authors, reviewers, and editors can best leverage post hoc analyses in the spirit of scientific discovery in a way that does not bias parameter estimates and recognizes the lack of definitiveness associated with any single study or any single replication. We also discuss why the failure to Thark in high-stakes contexts where data is scarce and costly may also be unethical. Keywords:},
author = {Hollenbeck, John R. and Wright, Patrick M.},
doi = {10.1177/0149206316679487},
file = {::},
issn = {15571211},
journal = {Journal of Management},
keywords = {macro topics,micro topics,philosophy of science,research design,research methods,statistical methods},
number = {1},
pages = {5--18},
publisher = {SAGE Publications Inc.},
title = {{Harking, Sharking, and Tharking: Making the Case for Post Hoc Analysis of Scientific Data}},
volume = {43},
year = {2016}
}
@article{Baker2016a,
author = {Baker, Monya},
doi = {10.1038/533452a},
issn = {0028-0836},
journal = {Nature},
month = {may},
number = {7604},
pages = {452--454},
title = {1,500 scientists lift the lid on reproducibility},
url = {http://www.nature.com/articles/533452a},
volume = {533},
year = {2016}
}
@article{Felipe2019,
abstract = {Scientists are becoming increasingly aware of a “replicability crisis” in the behavioral, social, and biomedical sciences. Researchers have made progress identifying statistical and methodological causes of the crisis. However, the social structure of science is also to blame. In the fields affected by the crisis, nobody is explicitly responsible and rewarded for doing confirmation and replication work. This paper makes the case for a social structural reform to address the problem. I argue that we need to establish a reward system that supports a dedicated group of confirmation researchers and formulate a proposal that would achieve this.},
author = {Felipe, Romero},
keywords = {Cognitive Science,Confirmation/Induction,Experimentation,Psychology,Social Epistemology of Science,Social Psychology},
title = {{The Division of Replication Labor}},
url = {http://philsci-archive.pitt.edu/16472/},
year = {2019}
}
@article{Gelman2005,
abstract = {Teaching is an important part of the jobs of statistics professors and graduate students, but most of us receive little training for teaching in general or statistics teaching in particular. We have developed a course for statistics graduate teaching assistants that combines practice in statistics demonstrations and drills, discussion of teaching strategies, and feedback on classroom teaching. The goal is to get our graduate students to be more comfortable with engaging their students in active learning.},
author = {Gelman, Andrew},
journal = {American Statistician},
title = {{A course on teaching statistics at the university level}},
year = {2005}
}
@article{Fielding2015,
abstract = {Background: A key skill for a practising clinician is being able to do research, understand the statistical analyses and interpret results in the medical literature. Basic statistics has become essential within medical education, but when, what and in which format is uncertain. Methods: To inform curriculum design/development we undertook a quantitative survey of fifth year medical students and followed them up with a series of focus groups to obtain their opinions as to what statistics teaching they want, when and how. Results: A total of 145 students undertook the survey and five focus groups were held with between 3 and 9 partici-pants each. Previous statistical training varied and students recognised their knowledge was inadequate and keen to see additional training implemented. Students were aware of the importance of statistics to their future careers, but appre-hensive about learning. Face-to-face teaching supported by online resources was popular. Focus groups indicated the need for statistical training early in their degree and highlighted their lack of confidence and inconsistencies in support. Conclusion: The study found that the students see the importance of statistics training in the medical curriculum but that timing and mode of delivery are key. The findings have informed the design of a new course to be implemented in the third undergraduate year. Teaching will be based around published studies aiming to equip students with the basics required with additional resources available through a virtual learning environment.},
author = {Fielding, S. and Poobalan, A. and Prescott, G. J. and Marais, D. and Aucott, L.},
journal = {Scottish Medical Journal},
title = {{Views of medical students: what, when and how do they want statistics taught?}},
year = {2015}
}
@article{Browman2015,
abstract = {{\^{A}}{\textcopyright} 2015 Browman et al.Background: Continuous escalation in methodological and procedural rigor for evidence-based processes in guideline development is associated with increasing costs and production delays that threaten sustainability. While health research methodologists are appropriately responsible for promoting increasing rigor in guideline development, guideline sponsors are responsible for funding such processes. Discussion: This paper acknowledges that other stakeholders in addition to methodologists should be more involved in negotiating trade-offs between methodological procedures and efficiency in guideline production to produce guidelines that are 'good enough' to be trustworthy and affordable under specific circumstances. The argument for reasonable methodological compromise to meet practical circumstances is consistent with current implicit methodological practice. This paper proposes a conceptual tool as a framework to be used by different stakeholders in negotiating, and explicitly reporting, reasonable compromises for trustworthy as well as cost-worthy guidelines. The framework helps fill a transparency gap in how methodological choices in guideline development are made. The principle, 'when good is good enough' can serve as a basis for this approach. Summary: The conceptual tool 'Efficiency-Validity Methodological Continuum' acknowledges trade-offs between validity and efficiency in evidence-based guideline development and allows for negotiation, guided by methodologists, of reasonable methodological compromises among stakeholders. Collaboration among guideline stakeholders in the development process is necessary if evidence-based guideline development is to be sustainable.},
author = {Browman, George P. and Somerfield, Mark R. and Lyman, Gary H. and Brouwers, Melissa C.},
journal = {Implementation Science},
title = {{When is good, good enough? Methodological pragmatism for sustainable guideline development}},
year = {2015}
}
@article{Lin2016,
abstract = {Across the social sciences, growing concerns about research transparency have led to calls for pre-analysis plans (PAPs) that specify in advance how researchers intend to analyze the data they are about to gather. PAPs promote transparency and credibility by helping readers distinguish between exploratory and confirmatory analyses. However, PAPs are time-consuming to write and may fail to anticipate contingencies that arise in the course of data collection. This article proposes the use of “standard operating procedures” (SOPs)—default practices to guide decisions when issues arise that were not anticipated in the PAP. We offer an example of an SOP that can be adapted by other researchers seeking a safety net to support their PAPs.},
author = {Lin, Winston and Green, Donald P.},
journal = {PS - Political Science and Politics},
title = {{Standard Operating Procedures: A Safety Net for Pre-Analysis Plans}},
year = {2016}
}
@article{Bridges-Rhoads2016,
abstract = {A historical indicator of the quality, validity, and rigor of qualitative research has been the documentation and disclosure of the behind-the-scenes work of the researcher. In this paper, we use what we call methodological data as a tool to complicate the possibility and desirability of such transparency. Specifically, we draw on our disparate attempts to address calls for transparency about methodological processes in our respective dissertation studies in order to examine how novice researchers can explore transparency as a situated, ongoing, and philosophically informed series of decisions about how, when, and if to be transparent about our work. This work contributes to conversations about how qualitative researchers in education can understand, discuss, and teach qualitative inquiry while continuing to push the boundaries of the field.},
author = {Bridges-Rhoads, Sarah and {Van Cleave}, Jessica and Hughes, Hilary E.},
journal = {International Journal of Qualitative Studies in Education},
title = {{Complicating methodological transparency}},
year = {2016}
}
@article{Acarturk2014,
abstract = {Statistical graphs-in particular, line graphs and bar graphs-are efficient means of communication in a wide range of non-expert settings. In communication settings, statistical graphs do not only serve as visualizations of individual data points but also provide visual access to various aspects of the information contained in data. Moreover, specific types of graphs are better means for providing visual access to certain aspects of data. For instance, trend information is visually conveyed through line graphs and bar graphs in the time domain. The interpretation of the information content in a graph is influenced by several factors, such as perceptual salience of line segments in a line graph. In addition, the presence of graphical cues substantially influences the interpretation of graph readers. Graphical cues are visual elements, usually in the form of point markers, non-directional lines, curves and arrows. They play a communicative role in communication through graphs. The present study reports an experimental investigation, in which the participants provided verbal descriptions of a set of graphs with/without graphical cues. The stimuli involved line graphs and bar graphs that represented the same data. The analyses of eye movements and verbal protocols reveal that the interpretations of the participants are systematically influenced by the presence or absence of a graphical cue, the type of the graphical cue (i.e., a point marker vs. an arrow), as well as the type of the graph (i.e., a line graph vs. a bar graph). {\textcopyright} 2013 Elsevier Ltd.},
author = {Acart{\"{u}}rk, Cengiz},
journal = {Journal of Visual Languages and Computing},
title = {{Towards a systematic understanding of graphical cues in communication through statistical graphs}},
year = {2014}
}
@article{Ritterhouse2014,
abstract = {Objective In recent years, vitamin D has been shown to possess a wide range of immunomodulatory effects. Although there is extensive amount of research on vitamin D, we lack a comprehensive understanding of the prevalence of vitamin D deficiency or the mechanism by which vitamin D regulates the human immune system. This study examined the prevalence and correlates of vitamin D deficiency and the relationship between vitamin D and the immune system in healthy individuals.  Methods Healthy individuals (n = 774) comprised of European-Americans (EA, n = 470), African–Americans (AA, n = 125), and Native Americans (NA, n = 179) were screened for 25-hydroxyvitamin D [25(OH)D] levels by ELISA. To identify the most noticeable effects of vitamin D on the immune system, 20 EA individuals with severely deficient ({\textless}11.3 ng/mL) and sufficient ({\textgreater}24.8 ng/mL) vitamin D levels were matched and selected for further analysis. Serum cytokine level measurement, immune cell phenotyping, and phosphoflow cytometry were performed.  Results Vitamin D sufficiency was observed in 37.5{\%} of the study cohort. By multivariate analysis, AA, NA, and females with a high body mass index (BMI, {\textgreater}30) demonstrate higher rates of vitamin D deficiency (p{\textless}0.05). Individuals with vitamin D deficiency had significantly higher levels of serum GM-CSF (p = 0.04), decreased circulating activated CD4+ (p = 0.04) and CD8+ T (p = 0.04) cell frequencies than individuals with sufficient vitamin D levels.  Conclusion A large portion of healthy individuals have vitamin D deficiency. These individuals have altered T and B cell responses, indicating that the absence of sufficient vitamin D levels could result in undesirable cellular and molecular alterations ultimately contributing to immune dysregulation.},
author = {Ritterhouse, Lauren L. and Lu, Rufei and Shah, Hemangi B. and Robertson, Julie M. and Fife, Dustin A. and Maecker, Holden T. and Du, Hongwu and Fathman, Charles G. and Chakravarty, Eliza F. and Scofield, R. Hal and Kamen, Diane L. and Guthridge, Joel M. and James, Judith A.},
doi = {10.1371/journal.pone.0094500},
editor = {Scriba, Thomas Jens},
file = {::},
issn = {1932-6203},
journal = {PLoS ONE},
month = {apr},
number = {4},
pages = {e94500},
publisher = {Public Library of Science},
title = {{Vitamin D Deficiency in a Multiethnic Healthy Control Cohort and Altered Immune Response in Vitamin D Deficient European-American Healthy Controls}},
url = {https://dx.plos.org/10.1371/journal.pone.0094500},
volume = {9},
year = {2014}
}
@article{Lu2016,
abstract = {Systemic lupus erythematosus (SLE) is a complex autoimmune disease with a poorly understood preclinical stage of immune dysregulation and symptom accrual. Accumulation of antinuclear autoantibody (ANA) specificities is a hallmark of impending clinical disease. Yet, many ANA-positive individuals remain healthy, suggesting that additional immune dysregulation underlies SLE pathogenesis. Indeed, we have recently demonstrated that interferon (IFN) pathways are dysregulated in preclinical SLE. To determine if other forms of immune dysregulation contribute to preclinical SLE pathogenesis, we measured SLE-associated autoantibodies and soluble mediators in samples from 84 individuals collected prior to SLE classification (average timespan = 5.98 years), compared to unaffected, healthy control samples matched by race, gender, age (±5 years), and time of sample procurement. We found that multiple soluble mediators, including interleukin (IL)-5, IL-6, and IFN-$\gamma$, were significantly elevated in cases compared to controls more than 3.5 years pre-classification, prior to or concurrent with autoantibody positivity. Additional mediators, including innate cytokines, IFN-associated chemokines, and soluble tumor necrosis factor (TNF) superfamily mediators increased longitudinally in cases approaching SLE classification, but not in controls. In particular, levels of B lymphocyte stimulator (BLyS) and a proliferation-inducing ligand (APRIL) were comparable in cases and controls until less than 10 months pre-classification. Over the entire pre-classification period, random forest models incorporating ANA and anti-Ro/SSA positivity with levels of IL-5, IL-6, and the IFN-$\gamma$-induced chemokine, MIG, distinguished future SLE patients with 92{\%} (±1.8{\%}) accuracy, compared to 78{\%} accuracy utilizing ANA positivity alone. These data suggest that immune dysregulation involving multiple pathways contributes to SLE pathogenesis. Importantly, distinct immunological profiles are predictive for individuals who will develop clinical SLE and may be useful for delineating early pathogenesis, discovering therapeutic targets, and designing prevention trials.},
author = {Lu, Rufei and Munroe, Melissa E. and Guthridge, Joel M. and Bean, Krista M. and Fife, Dustin A. and Chen, Hua and Slight-Webb, Samantha R. and Keith, Michael P. and Harley, John B. and James, Judith A.},
doi = {10.1016/J.JAUT.2016.06.001},
file = {::},
issn = {0896-8411},
journal = {Journal of Autoimmunity},
month = {nov},
pages = {182--193},
publisher = {Academic Press},
title = {{Dysregulation of innate and adaptive serum mediators precedes systemic lupus erythematosus classification and improves prognostic accuracy of autoantibodies}},
url = {https://www.sciencedirect.com/science/article/pii/S0896841116300798},
volume = {74},
year = {2016}
}
@article{Ritterhouse2014a,
abstract = {Objective In recent years, vitamin D has been shown to possess a wide range of immunomodulatory effects. Although there is extensive amount of research on vitamin D, we lack a comprehensive understanding of the prevalence of vitamin D deficiency or the mechanism by which vitamin D regulates the human immune system. This study examined the prevalence and correlates of vitamin D deficiency and the relationship between vitamin D and the immune system in healthy individuals.  Methods Healthy individuals (n = 774) comprised of European-Americans (EA, n = 470), African–Americans (AA, n = 125), and Native Americans (NA, n = 179) were screened for 25-hydroxyvitamin D [25(OH)D] levels by ELISA. To identify the most noticeable effects of vitamin D on the immune system, 20 EA individuals with severely deficient ({\textless}11.3 ng/mL) and sufficient ({\textgreater}24.8 ng/mL) vitamin D levels were matched and selected for further analysis. Serum cytokine level measurement, immune cell phenotyping, and phosphoflow cytometry were performed.  Results Vitamin D sufficiency was observed in 37.5{\%} of the study cohort. By multivariate analysis, AA, NA, and females with a high body mass index (BMI, {\textgreater}30) demonstrate higher rates of vitamin D deficiency (p{\textless}0.05). Individuals with vitamin D deficiency had significantly higher levels of serum GM-CSF (p = 0.04), decreased circulating activated CD4+ (p = 0.04) and CD8+ T (p = 0.04) cell frequencies than individuals with sufficient vitamin D levels.  Conclusion A large portion of healthy individuals have vitamin D deficiency. These individuals have altered T and B cell responses, indicating that the absence of sufficient vitamin D levels could result in undesirable cellular and molecular alterations ultimately contributing to immune dysregulation.},
author = {Ritterhouse, Lauren L. and Lu, Rufei and Shah, Hemangi B. and Robertson, Julie M. and Fife, Dustin A. and Maecker, Holden T. and Du, Hongwu and Fathman, Charles G. and Chakravarty, Eliza F. and Scofield, R. Hal and Kamen, Diane L. and Guthridge, Joel M. and James, Judith A.},
doi = {10.1371/journal.pone.0094500},
editor = {Scriba, Thomas Jens},
file = {::},
issn = {1932-6203},
journal = {PLoS ONE},
month = {apr},
number = {4},
pages = {e94500},
publisher = {Public Library of Science},
title = {{Vitamin D Deficiency in a Multiethnic Healthy Control Cohort and Altered Immune Response in Vitamin D Deficient European-American Healthy Controls}},
url = {https://dx.plos.org/10.1371/journal.pone.0094500},
volume = {9},
year = {2014}
}
@article{Munroe2016,
abstract = {Objectives The relationship of immune dysregulation and autoantibody production that may contribute to systemic lupus erythematosus (SLE) pathogenesis is unknown. This study evaluates the individual and combined contributions of autoantibodies, type I interferon (IFN-$\alpha$) activity, and IFN-associated soluble mediators to disease development leading to SLE.Methods Serial serum specimens from 55 individuals collected prior to SLE classification (average timespan=4.3 years) and unaffected healthy controls matched by age (±5 years), gender, race and time of sample procurement were obtained from the Department of Defense Serum Repository. Levels of serum IFN-$\alpha$ activity, IFN-associated mediators and autoantibodies were evaluated and temporal relationships assessed by growth curve modelling, path analysis, analysis of covariance and random forest models.Results In cases, but not matched controls, autoantibody specificities and IFN-associated mediators accumulated over a period of years, plateauing near the time of disease classification (p{\&}lt;0.001). Autoantibody positivity coincided with or followed type II IFN dysregulation, preceding IFN-$\alpha$ activity in growth curve models, with elevated IFN-$\alpha$ activity and B-lymphocyte stimulator levels occurring shortly before SLE classification (p≤0.005). Cases were distinguished by multivariate random forest models incorporating IFN-$\gamma$, macrophage chemoattractant protein (MCP)-3, anti-chromatin and anti-spliceosome antibodies (accuracy 93{\%} {\&}gt;4 years pre-classification; 97{\%} within 2 years of SLE classification).Conclusions Years before SLE classification, enhancement of the type II IFN pathway allows for accumulation of autoantibodies and subsequent elevations in IFN-$\alpha$ activity immediately preceding SLE classification. Perturbations in select immunological processes may help identify at-risk individuals for further clinical evaluation or participation in prospective intervention trials.},
author = {Munroe, Melissa E and Lu, Rufei and Zhao, Yan D and Fife, Dustin A. and Robertson, Julie M and Guthridge, Joel M and Niewold, Timothy B and Tsokos, George C and Keith, Michael P and Harley, John B and James, Judith A},
doi = {10.1136/annrheumdis-2015-208140},
journal = {Annals of the Rheumatic Diseases},
month = {nov},
number = {11},
pages = {2014 LP -- 2021},
title = {{Altered type II interferon precedes autoantibody accrual and elevated type I interferon activity prior to systemic lupus erythematosus classification}},
url = {http://ard.bmj.com/content/75/11/2014.abstract},
volume = {75},
year = {2016}
}
@article{Fife2014,
abstract = {Much research has been directed at the validity of fit indices in Path Analysis and Structural Equation Modeling (e.g., Browne, MacCallum, Kim, Andersen, {\&} Glaser, 2002 ; Heene, Hilbert, Draxler, Ziegler, {\&} B{\"{u}}hner, 2011 ; Hu {\&} Bentler, 1999 ; Marsh, Hau, {\&} Wen, 2004 ). Recent developments (e.g., Preacher, 2006 ; Roberts {\&} Pashler, 2000 , 2002 ) have encouraged researchers to investigate other criteria for comparing models, including model complexity. What has not been investigated is the inherent ability of a particular data set to be fitted with a constrained set of randomly generated linear models, which we call Model Conditioned Data Elasticity (DE). In this article we show how DE can be compared with the problem of equivalent models and a more general problem of the "confoundability" of data/model combinations (see MacCallum, Wegener, Uchino, {\&} Fabrigar, 1993 ). Using the DE package in R, we show how DE can be assessed through automated computer searches. Finally, we discuss how DE fits within the controversy surrounding the use of fit statistics.},
author = {Fife, Dustin A. and Rodgers, Joseph Lee and Mendoza, Jorge L.},
doi = {10.1080/00273171.2014.948608},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
month = {nov},
number = {6},
pages = {597--613},
pmid = {26735360},
title = {{Model Conditioned Data Elasticity in Path Analysis: Assessing the “Confoundability” of Model/Data Characteristics}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/26735360 http://www.tandfonline.com/doi/abs/10.1080/00273171.2014.948608},
volume = {49},
year = {2014}
}
@article{mayo_severe_tests,
author = {Gelman, Andrew and Haig, Brian D. and Hennig, Christian and Owen, Art B. and Cousins, Robert and Young, Stan and Robert, Christian and Yanofsky, Corey and Wagenmakers, Eric–Jan and Kenett, Ron and Lakeland, Daniel},
file = {::},
title = {{Many perspectives on Deborah Mayo's “Statistical Inference as Severe Testing: How to Get Beyond the Statistics Wars”}}
}
@article{Lv2019a,
abstract = {Researchers often face problems with missing correlations when applying meta-analytic structural equation modeling (MASEM). However, methodological investigation regarding the performance of MASEM paired with a large proportion of missingness is limited. This study investigated the impacts of missing conditions on the performance of correlation-based MASEM, utilizing weighted-covariance generalized least squares (W-COV GLS) with pairwise deletion (PD) and/or multiple imputation (MI), and two-stage structural equation modeling (TSSEM), with two factorial models. The results show the superiority of W-COV GLS with MI and the necessity of including studies with complete correlation matrices for TSSEM and W-COV GLS with PD. We identified the boundary conditions in which each approach performs satisfactorily and illustrated how the variation of factor loadings impacts the performance of correlation-based MASEM.},
author = {Lv, Jing and Maeda, Yukiko},
doi = {10.1080/10705511.2019.1646651},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
keywords = {missing correlations,tssem,w-cov gls with mi,w-cov gls with pd},
pages = {1--24},
title = {{Evaluation of the Efficacy of Meta-Analytic Structural Equation Modeling with Missing Correlations}},
volume = {5511},
year = {2019}
}
@article{Wingen,
annote = {This was an interesting study. They experimentally showed how poor replication rates affect peoples trust and psychology.},
author = {Wingen, Tobias and Berkessel, Jana and Englich, Birte},
doi = {10.31219/OSF.IO/4UKQ5},
file = {::},
keywords = {Ethics, erodes trust,Psychology,Social and Behavioral Sciences,Trust, public trust,open science,public trust,replicability,replication crisis,replication crisis: causes},
mendeley-tags = {Ethics, erodes trust,Trust, public trust,replication crisis,replication crisis: causes},
publisher = {OSF Preprints},
title = {{No Replication, no Trust? How Low Replicability Influences Trust in Psychology}},
url = {https://osf.io/4ukq5/}
}
@article{Tay2016a,
abstract = {Several calls have recently been issued to the social sciences for enhanced transparency of research processes and enhanced rigor in the methodological treatment of data and data analytics. We propose the use of graphical descriptives (GDs) as one mechanism for responding to both of these calls. GDs provide a way to visually examine data. They serve as quick and efficient tools for checking data distributions, variable relations, and the potential appropriateness of different statistical analyses (e.g., do data meet the minimum assumptions for a particular analytic method). Consequently, we believe that GDs can promote increased transparency in the journal review process, encourage best practices for data analysis, and promote a more inductive approach to understanding psychological data. We illustrate the value of potentially including GDs as a step in the peer-review process and provide a user-friendly online resource (www.graphicaldescriptives.org) for researchers interested in including data visualizations in their research. We conclude with suggestions on how GDs can be expanded and developed to enhance transparency.},
annote = {Meh. The big point of this article is that plotting data is following the call to be more transparent. But they suck at graphics and there's nothing new here.},
author = {Tay, Louis and Parrigon, Scott and Huang, Qiming and LeBreton, James M},
doi = {10.1177/1745691616663875},
file = {::},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {visualization,visualization resources,visualization; replication crisis,visualization; transparency},
mendeley-tags = {visualization,visualization resources,visualization; replication crisis,visualization; transparency},
month = {sep},
number = {5},
pages = {692--701},
publisher = {SAGE Publications Inc},
title = {{Graphical Descriptives: A Way to Improve Data Transparency and Methodological Rigor in Psychology}},
url = {https://doi.org/10.1177/1745691616663875},
volume = {11},
year = {2016}
}
@misc{Pastore2017,
annote = {Take home points:
1. Bar graphs are misleading
2. Summary statistics are misleading. 

Shows graphs that reveal more information than just the summary data and/or bar charts. 

Nothing too exciting here, but includes a lot of good references},
author = {Pastore, Massimiliano and Lionetti, Francesca and Alto{\`{e}}, Gianmarco},
booktitle = {Frontiers in Psychology},
doi = {10.3389/fpsyg.2017.01666},
file = {::},
issn = {16641078},
keywords = {Bar chart and box plot,Credibility crisis,Exploratory data analysis,Graphical representation,Statistical reasoning,visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
mendeley-tags = {visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
month = {sep},
number = {SEP},
publisher = {Frontiers Media S.A.},
title = {{When one shape does not fit all: A commentary essay on the use of graphs in psychological research}},
volume = {8},
year = {2017}
}
@article{Hallgren2019,
abstract = {Introduction: Structural equation modeling (SEM) is a multivariate data analytic technique used in many domains of addictive behaviors research. SEM results are usually summarized and communicated through statistical tables and path diagrams, which emphasize path coefficients and global fit without showing specific quantitative values of data points that underlie the model results. Data visualization methods are often absent in SEM research, which may limit the quality and impact of SEM research by reducing data transparency, obscuring unexpected data anomalies and unmodeled heterogeneity, and inhibiting the communication of SEM research findings to research stakeholders who do not have advanced statistical training in SEM. Methods and results: In this report, we show how data visualization methods can address these limitations and improve the quality of SEM-based addictive behaviors research. We first introduce SEM and data visualization methodologies and differentiate data visualizations from model visualizations that are commonly used in SEM, such as path diagrams. We then discuss ways researchers may utilize data visualization in SEM research, including by obtaining estimates of latent variables and by visualizing multivariate relations in two-dimensional figures. R syntax is provided to help others generate data visualizations for several types of effects commonly modeled in SEM, including correlation, regression, moderation, and simple mediation. Discussion: The techniques outlined here may help spur the use of data visualization in SEM-based addictive behaviors research. Using data visualization in SEM may enhance methodological transparency and improve communication of research findings.},
annote = {This is an important article to reference when I write my SEM visualization paper, but it is lacking in a lot of areas. Primarily, it treats the latent variable as if it is observed, with only passing mention that it is not. Also, there's no discussion about how to tell whether there is nonlinearity and how to assess the viability of the fit of the model.},
author = {Hallgren, Kevin A. and McCabe, Connor J. and King, Kevin M. and Atkins, David C.},
doi = {10.1016/j.addbeh.2018.08.030},
file = {::},
issn = {18736327},
journal = {Addictive Behaviors},
keywords = {Applied data analysis,Data visualization,Latent variable modeling,Mediation,Moderation,Structural equation model,assumtion checking,sem,visualization},
mendeley-tags = {assumtion checking,sem,visualization},
month = {jul},
pages = {74--82},
publisher = {Elsevier Ltd},
title = {{Beyond path diagrams: Enhancing applied structural equation modeling research through data visualization}},
volume = {94},
year = {2019}
}
@article{Godau2016,
abstract = {Computers provide ubiquitous contact to data graphs. Yet, employing the power of the human perception system bears the risk of being subject to its biases. Data graphs are used to present the means of different conditions and are supposed to convey group information, such as variability across conditions, as well as the grand average. Across three samples, we tested whether there is a bias in the central tendency perceived in bar graphs, 53 participants with a mean age of 27 years (plus replication with N = 38, mean age = 23 years). Participants were provided with bar and point graphs and had to judge their means. We found that the mean value was systematically underestimated in bar graphs (but not in point graphs) across different methods of testing for biased evaluation. In a second experiment (N = 80, mean age = 24 years) we replicated and extended this finding, by testing the effect of outliers on the bias in average estimation. For instance, outliers might trigger controlled processing. Yet, the underestimation of the average was replicated and was not affected by including outliers—despite that the estimate was torn towards the outlier. Thus, we should be cautious with relying on bar graphs when a bias free estimate of the grand average is relevant. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Godau, Claudia, Humboldt Universitat zu Berlin, Exzellenzcluster Bild Wissen Gestaltung, Unter den Linden 6, 10099, Berlin, Germany},
annote = {Accession Number: 2016-15017-009. Partial author list: First Author {\&} Affiliation: Godau, Claudia; Humboldt Universitat Berlin, Berlin, Germany. Release Date: 20160509. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Graphical Displays; Human Computer Interaction; Statistical Analysis. Classification: Psychometrics {\&} Statistics {\&} Methodology (2200). Population: Human (10); Male (30); Female (40). Location: Germany. Age Group: Adulthood (18 yrs {\&} older) (300); Young Adulthood (18-29 yrs) (320); Thirties (30-39 yrs) (340); Middle Age (40-64 yrs) (360). Methodology: Empirical Study; Quantitative Study. References Available: Y. Page Count: 7. Issue Publication Date: Jun, 2016. Publication History: First Posted Date: Feb 6, 2016; Accepted Date: Jan 27, 2016; Revised Date: Jan 15, 2016; First Submitted Date: Jul 9, 2015. Copyright Statement: All rights reserved. Elsevier Ltd. 2016.},
author = {Godau, Claudia and Vogelgesang, Tom and Gaschler, Robert},
doi = {10.1016/j.chb.2016.01.036},
file = {::},
issn = {0747-5632},
journal = {Computers in Human Behavior},
keywords = {Bar graphs,Biased perception,Data graphs,Graphical Displays,Human Computer Interaction,Statistical Analysis},
month = {jun},
pages = {67--73},
publisher = {Elsevier Science},
title = {{Perception of bar graphs—A biased impression?}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2016-15017-009{\&}site=ehost-live http://claudia.godau@hu-berlin.de},
volume = {59},
year = {2016}
}
@article{Ekstrom2014,
abstract = {Graphical model validation techniques for linear normal models are often used to check the assumptions underlying a statistical model. We describe an approach to provide ‘instant experience' in looking at a graphical model validation plot, so it becomes easier to validate if any of the underlying assumptions are violated. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Ekstr{\o}m, Claus Thorn},
annote = {Nothing new, but I do like the idea of Wally plots, which show random draws of residual plots from models that adhere to the assumptions. 

Accession Number: 2014-02021-008. Partial author list: First Author {\&} Affiliation: Ekstr{\o}m, Claus Thorn; Department of Biostatistics, University of Southern Denmark, Odense, Denmark. Other Publishers: Blackwell Publishing. Release Date: 20140414. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Graphical Displays; Mathematics Education; Statistics; Teaching Methods. Minor Descriptor: Models. Classification: Curriculum {\&} Programs {\&} Teaching Methods (3530). Population: Human (10). References Available: Y. Page Count: 4. Issue Publication Date: Spr 2014. Copyright Statement: The Authors. Teaching Statistics—Teaching Statistics Trust. 2013.},
author = {Ekstr{\o}m, Claus Thorn},
doi = {10.1111/test.12027},
file = {::},
issn = {0141-982X},
journal = {Teaching Statistics},
keywords = {Graphical Displays,Mathematics Education,Models,Statistics,Teaching Methods,graphical model validation techniques,instant experience,math education,statistical models,teaching methods,visualization,visualizations in teaching},
mendeley-tags = {visualization,visualizations in teaching},
number = {1},
pages = {23--26},
publisher = {Wiley-Blackwell Publishing Ltd.},
title = {{Teaching ‘instant experience' with graphical model validation techniques}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2014-02021-008{\&}site=ehost-live orcid: 0000-0003-1191-373X http://cekstrom@health.sdu.dk},
volume = {36},
year = {2014}
}
@article{Peden2001,
abstract = {In this article I describe an activity in which students use 4 data sets devised by F. J. Anscombe (1973) to enter data, compute Pearson rs, plot scatter graphs, and write results paragraphs. Although these data sets yield identical coefficients of correlation, (r = .82), the scatter graphs reveal that only 1 data set is appropriate for Pearson correlational analysis and interpretation. Students who complete this assignment exercise their data entry, computational, graphical, and writing skills and also learn that graphs play an important role in good statistical analysis (i.e., graphs prevent gaffes). Teachers of psychology and authors of textbooks for statistics and research methods courses can adapt this exercise themselves or employ existing adaptations. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
annote = {Accession Number: 2001-17378-013. Partial author list: First Author {\&} Affiliation: Peden, Blaine F.; U Wisconsin, Dept of Psychology, Eau Claire, WI, US. Other Publishers: Sage Publications; Taylor {\&} Francis. Release Date: 20010509. Correction Date: 20111003. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Print. Document Type: Journal Article. Language: English. Major Descriptor: Mathematics Education; Psychology Education; Statistical Analysis; Student Attitudes; Teaching Methods. Minor Descriptor: College Students; Graphical Displays; Writing Skills. Classification: Academic Learning {\&} Achievement (3550). Population: Human (10); Male (30); Female (40). Age Group: Adulthood (18 yrs {\&} older) (300). Methodology: Empirical Study. References Available: Y. Page Count: 3. Issue Publication Date: May, 2001.},
author = {Peden, Blaine F},
doi = {10.1207/S15328023TOP2802_14},
file = {::},
issn = {0098-6283},
journal = {Teaching of Psychology},
keywords = {College Students,Graphical Displays,Mathematics Education,Pearson scatter graphs,Psychology Education,Statistical Analysis,Student Attitudes,Teaching Methods,Writing Skills,college students,psychology education,statistical methods,student attitudes,teaching methods,visualizations in teaching,writing},
mendeley-tags = {visualizations in teaching},
month = {may},
number = {2},
pages = {129--131},
publisher = {Lawrence Erlbaum},
title = {{Correlational analysis and interpretation: Graphs prevent gaffes}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2001-17378-013{\&}site=ehost-live},
volume = {28},
year = {2001}
}
@article{Robbins2005,
abstract = {A cycle plot or a multiway chart often dramatically improves the presentation of data that some charts in common use do not communicate clearly. Yet these two graph forms are not well known. This article presents them through the use of before and after examples. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Robbins, Naomi B.},
annote = {Accession Number: 2014-16827-004. Other Journal Title: Document Design; Information Design Journal + Document Design. Partial author list: First Author {\&} Affiliation: Robbins, Naomi B.; NBR, Wayne, NJ, US. Release Date: 20160915. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Graphical Displays; Statistical Analysis; Statistical Data. Minor Descriptor: Data Processing. Classification: Statistics {\&} Mathematics (2240). Population: Human (10). Page Count: 7. Issue Publication Date: 2005. Copyright Statement: John Benjamins Publishing Company. 2005.},
author = {Robbins, Naomi B},
doi = {10.1075/idjdd.13.2.04rob},
file = {::},
issn = {0142-5471},
journal = {Information Design Journal},
keywords = {Data Processing,Graphical Displays,Statistical Analysis,Statistical Data,data analysis,graph forms,multiway chart,statistical analysis},
number = {2},
pages = {111--117},
publisher = {John Benjamins},
title = {{Cycle plots and multiway charts: Two useful little-known graph forms}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2014-16827-004{\&}site=ehost-live http://naomi@nbr-graphs.com},
volume = {13},
year = {2005}
}
@article{Reani2019,
abstract = {Humans find reasoning about uncertainty difficult. In decision support systems and software for intelligence analysis, graphical representations are commonly used to display uncertainty. Nevertheless, our understanding of how people use the information presented in graphs displaying uncertainty to make decisions is limited. As many artificial intelligent systems require a human-in-the-loop who is able to actively take part in the analysis process, the understanding of high-level cognition involved in human-graph interaction is essential in the design of better tools for analysis. In this research, we investigate the visual behaviour that is associated with participants responses to problems testing probabilistic reasoning represented through two different visualizations (tree and Venn diagrams). Using the data from visual fixations and transitions, we present a description of different reasoning strategies covering both accurate and inaccurate reasoning for different visualization formats. The results show that gaze behaviour is related to reasoning accuracy. Moreover, this study shows that different graphs representing the same problem evoke different reasoning strategies, suggesting that higher level cognition is influenced by the graphical representation in which uncertainty is encoded. (PsycINFO Database Record (c) 2019 APA, all rights reserved)},
address = {Reani, Manuele, School of Computer Science, University of Manchester, Kilburn Building, Oxford Road, Manchester, United Kingdom, M13 9PL},
annote = {Accession Number: 2019-04039-008. Partial author list: First Author {\&} Affiliation: Reani, Manuele; School of Computer Science, University of Manchester, Manchester, United Kingdom. Release Date: 20190307. Correction Date: 20190912. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Grant Information: Reani, Manuele. Major Descriptor: Decision Making; Graphical Displays; Human Computer Interaction; Reasoning; Visual Perception. Minor Descriptor: Imagery; Test Construction; Uncertainty; Visual Tracking. Classification: Human Factors Engineering (4010). Population: Human (10); Male (30); Female (40). Location: United Kingdom. Age Group: Adolescence (13-17 yrs) (200); Adulthood (18 yrs {\&} older) (300); Young Adulthood (18-29 yrs) (320); Thirties (30-39 yrs) (340). Tests {\&} Measures: Weather Problem-Adapted; Subjective Numeracy Scale DOI: 10.1037/t69659-000. Methodology: Empirical Study; Quantitative Study. Page Count: 10. Issue Publication Date: Mar, 2019. Publication History: First Posted Date: Oct 26, 2018; Accepted Date: Oct 23, 2018; Revised Date: Oct 4, 2018; First Submitted Date: Jun 7, 2018. Copyright Statement: Elsevier Ltd. 2018.},
author = {Reani, Manuele and Peek, Niels and Jay, Caroline},
doi = {10.1016/j.chb.2018.10.033},
file = {::},
issn = {0747-5632},
journal = {Computers in Human Behavior},
keywords = {Decision Making,Decision making,Eye tracking,Graphical Displays,Human Computer Interaction,Human-computer interaction,Imagery,Information visualization,Probabilistic reasoning,Reasoning,Test Construction,Uncertainty,Visual Perception,Visual Tracking,Visual analytics,graphical displays,graphs,uncertainty},
month = {mar},
pages = {55--64},
publisher = {Elsevier Science},
title = {{How different visualizations affect human reasoning about uncertainty: An analysis of visual behaviour}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2019-04039-008{\&}site=ehost-live orcid: 0000-0002-6080-1382 http://m.reani@manchester.ac.uk},
volume = {92},
year = {2019}
}
@article{Yuan2019,
abstract = {Across science, education, and business, we process and communicate data visually. One bedrock finding in data visualization research is a hierarchy of precision for perceptual encodings of data (e.g., that encoding data with Cartesian positions allows more precise comparisons than encoding with sizes). But this hierarchy has only been tested for single-value comparisons, under the assumption that those lessons would extrapolate to multivalue comparisons. We show that when comparing averages across multiple data points, even for pairs of data points, these differences vanish. Viewers instead compare values using surprisingly primitive perceptual cues (e.g., the summed area of bars in a bar graph). These results highlight a critical need to study a broader constellation of visual cues that mediate the patterns that we can see in data, across visualization types and tasks. (PsycINFO Database Record (c) 2019 APA, all rights reserved)},
address = {Yuan, Lei},
annote = {Accession Number: 2018-47526-001. PMID: 30238296 Other Journal Title: Bulletin of the Psychonomic Society. Partial author list: First Author {\&} Affiliation: Yuan, Lei; Indiana University, Bloomington, IN, US. Other Publishers: Psychonomic Society. Release Date: 20180924. Correction Date: 20190527. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Imagery; Science Education; Visual Displays; Visual Perception. Minor Descriptor: Business; Education; Test Construction; Task. Classification: Visual Perception (2323). Population: Human (10). Tests {\&} Measures: Graph-Reading Task. Methodology: Empirical Study; Quantitative Study. Supplemental Data: Other Internet. Page Count: 8. Issue Publication Date: Apr 15, 2019. Publication History: First Posted Date: Sep 20, 2018. Copyright Statement: Psychonomic Society, Inc. 2018.},
author = {Yuan, Lei and Haroz, Steve and Franconeri, Steven},
doi = {10.3758/s13423-018-1525-7},
file = {::},
issn = {1069-9384},
journal = {Psychonomic Bulletin {\&} Review},
keywords = {Business,Data visualization,Education,Graph comprehension,Imagery,Magnitude perception,Science Education,Task,Test Construction,Visual Displays,Visual Perception,Visual perception},
month = {apr},
number = {2},
pages = {669--676},
publisher = {Springer},
title = {{Perceptual proxies for extracting averages in data visualizations}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2018-47526-001{\&}site=ehost-live orcid: 0000-0003-1280-2599 http://leiyuan@indiana.edu},
volume = {26},
year = {2019}
}
@article{Liu2019,
abstract = {Linear discriminant analysis (LDA) is a well-known classification technique that enjoyed great success in practical applications. Despite its effectiveness for traditional low-dimensional problems, extensions of LDA are necessary in order to classify high-dimensional data. Many variants of LDA have been proposed in the literature. However, most of these methods do not fully incorporate the structure information among predictors when such information is available. In this paper, we introduce a new high-dimensional LDA technique, namely graph-based sparse LDA (GSLDA), that utilizes the graph structure among the features. In particular, we use the regularized regression formulation for penalized LDA techniques, and propose to impose a structure-based sparse penalty on the discriminant vector $\beta$ . The graph structure can be either given or estimated from the training data. Moreover, we explore the relationship between the within-class feature structure and the overall feature structure. Based on this relationship, we further propose a variant of our proposed GSLDA to utilize effectively unlabeled data, which can be abundant in the semi-supervised learning setting. With the new regularization, we can obtain a sparse estimate of $\beta$ and more accurate and interpretable classifiers than many existing methods. Both the selection consistency of $\beta$ estimation and the convergence rate of the classifier are established, and the resulting classifier has an asymptotic Bayes error rate. Finally, we demonstrate the competitive performance of the proposed GSLDA on both simulated and real data studies. (PsycINFO Database Record (c) 2019 APA, all rights reserved)},
address = {Liu, Yufeng, Department of Statistics and Operations Research, University of North Carolina, Chapel Hill, NC, US, 27599},
annote = {Accession Number: 2019-13301-017. Partial author list: First Author {\&} Affiliation: Liu, Jianyu; Department of Statistics and Operations Research, University of North Carolina, Chapel Hill, NC, US. Release Date: 20190404. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Graphical Displays; Statistical Analysis. Classification: Statistics {\&} Mathematics (2240). Population: Human (10). Methodology: Empirical Study; Mathematical Model; Quantitative Study. Page Count: 20. Issue Publication Date: May, 2019. Publication History: First Posted Date: Dec 17, 2018; First Submitted Date: Sep 21, 2017. Copyright Statement: All rights reserved. Elsevier Inc. 2018.},
author = {Liu, Jianyu and Yu, Guan and Liu, Yufeng},
doi = {10.1016/j.jmva.2018.12.007},
file = {::},
issn = {0047-259X},
journal = {Journal of Multivariate Analysis},
keywords = {Feature structure,Gaussian graphical models,Graphical Displays,Regularization,Statistical Analysis,Undirected graph},
month = {may},
pages = {250--269},
publisher = {Elsevier Science},
title = {{Graph-based sparse linear discriminant analysis for high-dimensional classification}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2019-13301-017{\&}site=ehost-live orcid: 0000-0001-7723-3556 http://yfliu@email.unc.edu},
volume = {171},
year = {2019}
}
@techreport{Correll2015a,
author = {Correll, Michael A},
file = {::},
title = {{Improving Visual Statistics}},
year = {2015}
}
@inproceedings{Terry2008,
address = {Hawaii},
author = {Terry, Robert and Richman, M. R. and Murphy, T. J. and McKnight, C. C.},
booktitle = {Hawaii International Conference in Statistics, Mathematics, and Data Analysis},
title = {{Teaching Statistics Conceptually}},
year = {2008}
}
@inproceedings{Terry2008a,
address = {Hawaii},
author = {Terry, Robert and Schuelke, M},
booktitle = {Hawaii International Conference in Statistics, Mathematics, and Data Analysis},
title = {{Structural Representations of Statistics Concepts Knowledge}},
year = {2008}
}
@article{Goldacre2019,
abstract = {Discrepancies between pre-specified and reported outcomes are an important and prevalent source of bias in clinical trials. COMPare (Centre for Evidence-Based Medicine Outcome Monitoring Project) monitored all trials in five leading journals for correct outcome reporting, submitted correction letters on all misreported trials in real time, and then monitored responses from editors and trialists. From the trialists' responses, we aimed to answer two related questions. First, what can trialists' responses to corrections on their own misreported trials tell us about trialists' knowledge of correct outcome reporting? Second, what can a cohort of responses to a standardised correction letter tell us about how researchers respond to systematic critical post-publication peer review? All correspondence from trialists, published by journals in response to a correction letter from COMPare, was filed and indexed. We analysed the letters qualitatively and identified key themes in researchers' errors about correct outcome reporting, and approaches taken by researchers when their work was criticised. Trialists frequently expressed views that contradicted the CONSORT (Consolidated Standards of Reporting Trials) guidelines or made inaccurate statements about correct outcome reporting. Common themes were: stating that pre-specification after trial commencement is acceptable; incorrect statements about registries; incorrect statements around the handling of multiple time points; and failure to recognise the need to report changes to pre-specified outcomes in the trial report. We identified additional themes in the approaches taken by researchers when responding to critical correspondence, including the following: ad hominem criticism; arguing that trialists should be trusted, rather than follow guidelines for trial reporting; appealing to the existence of a novel category of outcomes whose results need not necessarily be reported; incorrect statements by researchers about their own paper; and statements undermining transparency infrastructure, such as trial registers. Researchers commonly make incorrect statements about correct trial reporting. There are recurring themes in researchers' responses when their work is criticised, some of which fall short of the scientific ideal. Research on methodological shortcomings is now common, typically in the form of retrospective cohort studies describing the overall prevalence of a problem. We argue that prospective cohort studies which additionally issue correction letters in real time on each individual flawed study—and then follow-up responses from trialists and journals—are more impactful, more informative for those consuming the studies critiqued, more informative on the causes of shortcomings in research, and a better use of research resources.},
author = {Goldacre, Ben and Drysdale, Henry and Marston, Cicely and Mahtani, Kamal R. and Dale, Aaron and Milosevic, Ioan and Slade, Eirion and Hartley, Philip and Heneghan, Carl},
doi = {10.1186/s13063-019-3172-3},
file = {::},
issn = {1745-6215},
journal = {Trials},
keywords = {Biomedicine,Health Sciences,Medicine,Medicine/Public Health,Statistics for Life Sciences,general},
month = {dec},
number = {1},
pages = {124},
publisher = {BioMed Central},
title = {{COMPare: Qualitative analysis of researchers' responses to critical correspondence on a cohort of 58 misreported trials}},
url = {https://trialsjournal.biomedcentral.com/articles/10.1186/s13063-019-3172-3},
volume = {20},
year = {2019}
}
@article{VanAert2016,
abstract = {Because evidence of publication bias in psychology is overwhelming, it is important to develop techniques that correct meta-analytic estimates for publication bias. Van Assen, Van Aert, and Wicherts (2015) and Simonsohn, Nelson, and Simmons (2014a) developed p-uniform and p- curve, respectively. The methodology on which these methods are based has great promise for providing accurate meta-analytic estimates in the presence of publication bias. However, we show that in some situations p-curve behaves erratically while p-uniform may yield implausible negative effect size estimates. Moreover, we show that (and explain why) p-curve and p-uniform overestimate effect size under moderate to large heterogeneity, and may yield unpredictable bias when researchers employ p-hacking. We offer hands-on recommendations on applying and interpreting results of meta-analysis in general and p-uniform and p-curve in particular. Both methods as well as traditional methods are applied to a meta-analysis on the effect of weight on judgments of importance. We offer guidance for applying p-uniform or p-curve using R and a user-friendly web application for applying p-uniform (https://rvanaert.shinyapps.io/p-uniform).},
author = {van Aert, Robbie C.M. and Wicherts, Jelte M. and van Assen, Marcel A.L.M.},
doi = {10.1177/1745691616650874},
issn = {17456924},
journal = {Perspectives on Psychological Science},
number = {5},
title = {{Conducting Meta-Analyses Based on p Values: Reservations and Recommendations for Applying p-Uniform and p-Curve}},
volume = {11},
year = {2016}
}
@article{Dienes2016,
abstract = {Bayes factors provide a symmetrical measure of evidence for one model versus another (e.g. H1 versus H0) in order to relate theory to data. These properties help solve some (but not all) of the problems underlying the credibility crisis in psychology. The symmetry of the measure of evidence means that there can be evidence for H0 just as much as for H1; or the Bayes factor may indicate insufficient evidence either way. P-values cannot make this three-way distinction. Thus, Bayes factors indicate when the data count against a theory (and when they count for nothing); and thus they indicate when replications actually support H0 or H1 (in ways that power cannot). There is every reason to publish evidence supporting the null as going against it, because the evidence can be measured to be just as strong either way (thus the published record can be more balanced). Bayes factors can be B-hacked but they mitigate the problem because a) they allow evidence in either direction so people will be less tempted to hack in just one direction; b) as a measure of evidence they are insensitive to the stopping rule; c) families of tests cannot be arbitrarily defined; and d) falsely implying a contrast is planned rather than post hoc becomes irrelevant (though the value of pre-registration is not mitigated).},
author = {Dienes, Zoltan},
doi = {10.1016/j.jmp.2015.10.003},
issn = {10960880},
journal = {Journal of Mathematical Psychology},
title = {{How Bayes factors change scientific practice}},
volume = {72},
year = {2016}
}
@article{Ke2019,
abstract = {{\textcopyright} 2018, Copyright {\textcopyright} 2018 Taylor  {\&}  Francis Group, LLC. Meta-analytic structural equation modeling (MASEM) refers to a set of meta-analysis techniques for combining and comparing structural equation modeling (SEM) results from multiple studies. Existing approaches to MASEM cannot appropriately model between-studies heterogeneity in structural parameters because of missing correlations, lack model fit assessment, and suffer from several theoretical limitations. In this study, we address the major shortcomings of existing approaches by proposing a novel Bayesian multilevel SEM approach. Simulation results showed that the proposed approach performed satisfactorily in terms of parameter estimation and model fit evaluation when the number of studies and the within-study sample size were sufficiently large and when correlations were missing completely at random. An empirical example about the structure of personality based on a subset of data was provided. Results favored the third factor structure over the hierarchical structure. We end the article with discussions and future directions.},
author = {Ke, Zijun and Zhang, Qian and Tong, Xin},
doi = {10.1080/10705511.2018.1530059},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Bayesian approach,MASEM,meta-analysis,multilevel SEM,structural equation modeling},
number = {3},
pages = {348--370},
title = {{Bayesian Meta-Analytic SEM: A One-Stage Approach to Modeling Between-Studies Heterogeneity in Structural Parameters}},
volume = {26},
year = {2019}
}
@article{Wiedermann2019,
abstract = {AbstractA commentary by Thoemmes on Wiedermann and Sebastian's introductory article on Direction Dependence Analysis (DDA) is responded to in the interest of elaborating and extending direction dep...},
author = {Wiedermann, Wolfgang and Sebastian, James},
doi = {10.1080/00273171.2019.1659127},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
keywords = {Direction of dependence,causal inference,sensitivity analysis},
month = {sep},
pages = {1--8},
publisher = {Routledge},
title = {{Sensitivity Analysis and Extensions of Testing the Causal Direction of Dependence: A Rejoinder to Thoemmes (2019)}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2019.1659127},
year = {2019}
}
@article{Fife2019e,
author = {Fife, Dustin A.},
doi = {10.1177/1745691620917333},
journal = {Perspectives on Psychological Science},
keywords = {(in press)},
mendeley-tags = {(in press)},
number = {4},
pages = {1054--1075},
publisher = {PsyArXiv},
title = {{The Eight Steps of Data Analysis: A Graphical Framework to Promote Sound Statistical Analysis}},
url = {https://psyarxiv.com/r8g7c/},
volume = {15},
year = {2020}
}
@article{VandeSchoot2014,
author = {van de Schoot, Rens and Depaoli, Sarah},
issn = {2225-6962},
journal = {The European Health Psychologist},
number = {2},
pages = {75--84},
title = {{Bayesian analyses: Where to start and what to report}},
volume = {16},
year = {2014}
}
@article{Brandt2019,
abstract = {AbstractMediator models have been developed primarily under the assumption of no-unmeasured-confounding. In many situations, this assumption is violated and may lead to the identification of mediat...},
author = {Brandt, Holger},
doi = {10.1080/00273171.2019.1656051},
journal = {https://doi.org/10.1080/00273171.2019.1656051},
pages = {1--22},
title = {{A More Efficient Causal Mediator Model Without the No-Unmeasured-Confounder Assumption}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2019.1656051},
year = {2019}
}
@article{Strobl2009,
abstract = {Recursive partitioning methods have become popular and widely used tools for nonparametric regression and classification in many scientific fields. Especially random forests, which can deal with large numbers of predictor variables even in the presence of complex interactions, have been applied successfully in genetics, clinical medicine, and bioinformatics within the past few years. High-dimensional problems are common not only in genetics, but also in some areas of psychological research, where only a few subjects can be measured because of time or cost constraints, yet a large amount of data is generated for each subject. Random forests have been shown to achieve a high prediction accuracy in such applications and to provide descriptive variable importance measures reflecting the impact of each variable in both main effects and interactions. The aim of this work is to introduce the principles of the standard recursive partitioning methods as well as recent methodological improvements, to illustrate their usage for low and high-dimensional data exploration, but also to point out limitations of the methods and potential pitfalls in their practical application. Application of the methods is illustrated with freely available implementations in the R system for statistical computing. {\textcopyright} 2009 American Psychological Association.},
author = {Strobl, Carolin and Malley, James and Tutz, Gerhard},
doi = {10.1037/a0016973},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {classification,prediction,regression,variable importance},
month = {dec},
number = {4},
pages = {323--348},
title = {{An Introduction to Recursive Partitioning: Rationale, Application, and Characteristics of Classification and Regression Trees, Bagging, and Random Forests}},
volume = {14},
year = {2009}
}
@article{Freeman2008,
abstract = {Statistics is relevant to students and practitioners in medicine and health sciences and is increasingly taught as part of the medical curriculum. However, it is common for students to dislike and under-perform in statistics. We sought to address these issues by redesigning the way that statistics is taught. The project brought together a statistician, clinician and educational experts to re-conceptualize the syllabus, and focused on developing different methods of delivery. New teaching materials, including videos, animations and contextualized workbooks were designed and produced, placing greater emphasis on applying statistics and interpreting data. Two cohorts of students were evaluated, one with old style and one with new style teaching. Both were similar with respect to age, gender and previous level of statistics. Students who were taught using the new approach could better define the key concepts of p-value and confidence interval (p {\textless} 0.001 for both). They were more likely to regard statistics as integral to medical practice (p = 0.03), and to expect to use it in their medical career (p = 0.003). There was no significant difference in the numbers who thought that statistics was essential to understand the literature (p = 0.28) and those who felt comfortable with the basics of statistics (p = 0.06). More than half the students in both cohorts felt that they were comfortable with the basics of medical statistics. Using a variety of media, and placing emphasis on interpretation can help make teaching, learning and understanding of statistics more people-centred and relevant, resulting in better outcomes for students.},
author = {Freeman, Jenny V and Collier, Steve and Staniforth, David and Smith, Kevin J},
doi = {10.1186/1472-6920-8-28},
file = {::},
issn = {1472-6920},
journal = {BMC Medical Education},
keywords = {Medical Education,Theory of Medicine/Bioethics},
month = {dec},
number = {1},
pages = {28},
publisher = {BioMed Central},
title = {{Innovations in curriculum design: A multi-disciplinary approach to teaching statistics to undergraduate medical students}},
url = {https://bmcmededuc.biomedcentral.com/articles/10.1186/1472-6920-8-28},
volume = {8},
year = {2008}
}
@book{Cumming2013,
author = {Cumming, Geoff},
isbn = {1136659196},
publisher = {Routledge},
title = {{Understanding the new statistics: Effect sizes, confidence intervals, and meta-analysis}},
year = {2013}
}
@book{Cleveland1993,
address = {Summitt, NJ},
author = {Cleveland, William S},
isbn = {0963488406},
publisher = {Hobart Press},
title = {{Visualizing data}},
year = {1993}
}
@misc{JASPTeam2019,
author = {{JASP Team}},
file = {::},
title = {{JASP (Version 0.10.2)[Computer software]}},
url = {https://jasp-stats.org/},
year = {2019}
}
@article{Kaufman2018,
abstract = {How does the current replication crisis, along with other recent psychological trends, affect scientific creativity? To answer this question, we consider current debates regarding replication throu...},
author = {Kaufman, James C. and Glǎveanu, Vlad P.},
doi = {10.1177/1745691617753947},
journal = {https://doi.org/10.1177/1745691617753947},
keywords = {Ethics,Ethics, erodes trust,change: top-down,competing goals of a study,criticisms,exploratory statistics,grant funding: problems of,intentions,openness and transparency, not enough,pre-registration, criticisms,reform,replication, not enough,replication: criticisms,stifles creativity},
mendeley-tags = {Ethics,Ethics, erodes trust,change: top-down,competing goals of a study,criticisms,exploratory statistics,grant funding: problems of,intentions,openness and transparency, not enough,pre-registration, criticisms,reform,replication, not enough,replication: criticisms,stifles creativity},
number = {4},
pages = {457--465},
title = {{The Road to Uncreative Science Is Paved With Good Intentions: Ideas, Implementations, and Uneasy Balances:}},
url = {http://journals.sagepub.com/doi/10.1177/1745691617753947},
volume = {13},
year = {2018}
}
@misc{SocietyfortheImprovementofPsychologicalScience2019,
author = {{Society for the Improvement of Psychological Science}},
title = {{Mission – Society for the Improvement of Psychological Science}},
url = {https://improvingpsych.org/mission/},
urldate = {2019-09-18},
year = {2019}
}
@techreport{Nelder1972,
abstract = {JSTOR is a not-for-profit service that helps scholars, researchers, and students discover, use, and build upon a wide range of content in a trusted digital archive. We use information technology and tools to increase productivity and facilitate new forms of scholarship. For more information about JSTOR, please contact support@jstor.org. SUMMARY The technique of iterative weighted linear regression can be used to obtain maximum likelihood estimates of the parameters with observations distributed according to some exponential family and systematic effects that can be made linear by a suitable transformation. A generalization of the analysis of variance is given for these models using log-likelihoods. These generalized linear models are illustrated by examples relating to four distributions; the Normal, Binomial (probit analysis, etc.), Poisson (contingency tables) and gamma (variance components). The implications of the approach in designing statistics courses are discussed.},
author = {Nelder, J A and Wedderburn, R W M},
booktitle = {Source: Journal of the Royal Statistical Society. Series A (General)},
file = {::},
keywords = {ANALYSIS OF VARIANCE,CONTINGENCY TABLES,EXPONENTIAL FAMILIES,INVERSE POLYNOMIALS,LINEAR MODELS,MAXIMUM LIKELIHOOD: QUANTAL RESPONSE,REGRESSION,VARIANCE COMPONENTS,WEIGHTED LEAST SQUARES},
number = {3},
pages = {370},
title = {{Generalized Linear Models}},
url = {https://pdfs.semanticscholar.org/105f/0072f191a4ceb7c381fc4fd93f460aabf6b1.pdf},
volume = {135},
year = {1972}
}
@article{Curran2003,
author = {Curran, Patrick J.},
doi = {10.1207/s15327906mbr3804_5},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
month = {oct},
number = {4},
pages = {529--569},
title = {{Have Multilevel Models Been Structural Equation Models All Along?}},
url = {http://www.tandfonline.com/doi/abs/10.1207/s15327906mbr3804{\_}5},
volume = {38},
year = {2003}
}
@article{Camargo2015,
abstract = {Public health is usually enacted through public policies, necessitating that the public engage in debates that, ideally, are grounded in solid scientific findings. Mistrust in science, however, has compromised the possibility of deriving sound policy from such debates, partially owing to justified concerns regarding undue interference and even outright manipulation by commercial interests. This situation has generated problematic impasses, one of which is the emergence of an anti-vaccination movement that is already affecting public health, with a resurgence in the United States of preventable diseases thought to have been eradicated. Drawing on British sociologist Harry Collins' work on expertise, we propose a theoretical framework in which the paralyzing, undue public distrust of science can be analyzed and, it is hoped, overcome.},
author = {Camargo, Kenneth and Grant, Roy},
doi = {10.2105/AJPH.2014.302241},
file = {::},
issn = {1541-0048},
journal = {American journal of public health},
month = {feb},
number = {2},
pages = {232--5},
pmid = {25521880},
publisher = {American Public Health Association},
title = {{Public health, science, and policy debate: being right is not enough.}},
url = {http://ajph.aphapublications.org/doi/10.2105/AJPH.2014.302241 http://www.ncbi.nlm.nih.gov/pubmed/25521880 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4318315},
volume = {105},
year = {2015}
}
@misc{TheSciencePredictionMarketProject2016,
author = {{The Science Prediction Market Project}},
title = {{Experimental Economics Replication Project}},
url = {https://experimentaleconreplications.com/},
urldate = {2019-09-18},
year = {2016}
}
@article{Dahiru2008,
abstract = {While it's not the intention of the founders of significance testing and hypothesis testing to have the two ideas intertwined as if they are complementary, the inconvenient marriage of the two practices into one coherent, convenient, incontrovertible and misinterpreted practice has dotted our standard statistics textbooks and medical journals. This paper examine factors contributing to this practice, traced the historical evolution of the Fisherian and Neyman-Pearsonian schools of hypothesis testing, exposed the fallacies and the uncommon ground and common grounds approach to the problem. Finally, it offers recommendations on what is to be done to remedy the situation.},
author = {Dahiru, Tukur},
file = {::},
issn = {1597-1627},
journal = {Annals of Ibadan postgraduate medicine},
month = {jun},
number = {1},
pages = {21--6},
pmid = {25161440},
publisher = {Association of Resident Doctors, University College Hospital, Ibadan},
title = {{P - value, a true test of statistical significance? A cautionary note.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/25161440 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4111019},
volume = {6},
year = {2008}
}
@article{Lang2013,
author = {Lang, Thomas A and Altman, Douglas G},
journal = {Handbook. Cambridge, UK: European Association of Science Editors},
pages = {175--182},
title = {{5.7: Basic statistical reporting for articles published in clinical medical journals: The Statistical Analyses and Methods in the Published Literature, or SAMPL guidelines}},
year = {2013}
}
@article{Kindratt2015,
author = {Kindratt, Tiffany and Raza, Abid and Anderson, John and Evans, Darrell J R and Gimpel, Nora},
issn = {1473-9879},
journal = {Education for Primary Care},
number = {1},
pages = {53--54},
publisher = {Taylor {\&} Francis},
title = {{Letter to the Editor: ‘Don't be Scared': Demystifying Statistics in Postgraduate Medical Education}},
volume = {26},
year = {2015}
}
@article{Sterling1995,
abstract = {Evidence is presented that published results of scientific investigations are not a representative sample of results of all scientific studies.  Research studies from 11 major journals illustrate the existence of biases that favor studies that observe effects that, on statistical evaluation, have a low probability of erroneously rejecting the so-called null hypothesis.  This practice introduces two biases in the interpretation of the scientific literature:  The first is due to the multiple repetition of studies with false hypotheses, and the second is due to failure to publish smaller and less significant outcomes of tests of true hypotheses. These practices create distortions in the findings of literature surveys and of meta-analyses.},
annote = {Accession Number: 508547812; Authors:Sterling, T. D.; Rosenbaum, W. L.; Weinkam, J. J.; Physical Description: Bibliography; Subject: Social science research; Subject: Content analysis; Subject: Statistical significance; Subject: Social statistics; Subject: Acquisition of manuscripts; Subject: Social science periodicals; Subject: Scientific communication; Number of Pages: 5p; Record Type: Article},
author = {Sterling, T D and Rosenbaum, W L and Weinkam, J J},
issn = {00031305},
journal = {American Statistician},
keywords = {Acquisition of manuscripts,Content analysis,Scientific communication,Social science periodicals,Social science research,Social statistics,Statistical significance},
month = {feb},
pages = {108--112},
title = {{Publication decisions revisited: the effect of the outcome of statistical tests on the decision to publish and vice versa.}},
url = {http://10.0.9.3/2684823 https://search.ebscohost.com/login.aspx?direct=true{\&}db=eue{\&}AN=508547812{\&}site=ehost-live},
volume = {49},
year = {1995}
}
@article{Rotello2015,
abstract = {There is a replication crisis in science, to which psychological research has not been immune: Many effects have proven uncomfortably difficult to reproduce. Although the reliability of data is a serious concern, we argue that there is a deeper and more insidious problem in the field: the persistent and dramatic misinterpretation of empirical results that replicate easily and consistently. Using a series of four highly studied 'textbook' examples from different research domains (eyewitness memory, deductive reasoning, social psychology, and child welfare), we show how simple unrecognized incompatibilities among dependent measures, analysis tools, and the properties of data can lead to fundamental interpretive errors. These errors, which are not reduced by additional data collection, may lead to misguided research efforts and policy recommendations. We conclude with a set of recommended strategies and research tools to reduce the probability of these persistent and largely unrecognized errors. The use of receiver operating characteristic (ROC) curves is highlighted as one such recommendation.},
address = {Univ Massachusetts, Dept Psychol and Brain Sci, 135 Hicks Way, Amherst, MA 01003 USA},
annote = {Accession Number: BACD201500473824; Publication Type: Article; Document Type: Literature Review; Languages: English; Parts, Structures, {\&} Systems: immune system, Systems: immune system ; Methods {\&} Equipment: statistical inference, Roles: mathematical and computer techniques Methods {\&} Equipment: dramatic misinterpretation, Roles: mathematical and computer techniques Methods {\&} Equipment: fundamental interpretive error, Roles: mathematical and computer techniques ; Miscellaneous Descriptors: social cognitiondeductive reasoningsocial psychologysignal detection theorypsychological researchwrong dependent measure perpetuate erroneous conclusionreplication crisis; Concept Code: 04500 [Mathematical biology and statistical methods]; 05500 [Social biology and human ecology]; 34502 [Immunology - General and methods]; ; Copyright Statement: Copyright Clarivate Analytics 2019},
author = {Rotello, Caren M and Heit, Evan and Dube, Chad},
file = {::},
issn = {1069-9384 (print)},
journal = {Psychonomic Bulletin {\&} Review},
keywords = {Mathematical Biology (Computational Biology),Sociology (Population Studies)},
month = {aug},
number = {4},
pages = {944--954},
title = {{When more data steer us wrong: replications with the wrong dependent measure perpetuate erroneous conclusions}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=bxh{\&}AN=BACD201500473824{\&}site=ehost-live http://www.springerlink.com/content/1069-9384 http://caren@psych.umass.edu},
volume = {22},
year = {2015}
}
@article{Schimmack2012,
abstract = {Cohen (1962) pointed out the importance of statistical power for psychology as a science, but statistical power of studies has not increased, while the number of studies in a single article has increased. It has been overlooked that multiple studies with modest power have a high probability of producing nonsignificant results because power decreases as a function of the number of statistical tests that are being conducted (Maxwell, 2004). The discrepancy between the expected number of significant results and the actual number of significant results in multiple-study articles undermines the credibility of the reported results, and it is likely that questionable research practices have contributed to the reporting of too many significant results (Sterling, 1959). The problem of low power in multiple-study articles is illustrated using Bem's (2011) article on extrasensory perception and Gailliot et al.'s (2007) article on glucose and self-regulation. I conclude with several recommendations that can increase the credibility of scientific evidence in psychological journals. One major recommendation is to pay more attention to the power of studies to produce positive results without the help of questionable research practices and to request that authors justify sample sizes with a priori predictions of effect sizes. It is also important to publish replication studies with nonsignificant results if these studies have high power to replicate a published finding. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Schimmack, Ulrich, Department of Psychology, University of Toronto Mississauga, 3359 Mississauga Road North, Mississauga, ON, Canada, L5L 1C6},
annote = {Accession Number: 2012-23130-001. PMID: 22924598 Partial author list: First Author {\&} Affiliation: Schimmack, Ulrich; University of Toronto Mississauga, Mississauga, ON, Canada. Release Date: 20120827. Correction Date: 20140908. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Credibility; Experimentation; Scientific Communication; Statistical Power. Minor Descriptor: Sample Size; Statistical Significance. Classification: Statistics {\&} Mathematics (2240). References Available: Y. Page Count: 16. Issue Publication Date: Dec, 2012. Publication History: First Posted Date: Aug 27, 2012; Accepted Date: Jun 25, 2012; Revised Date: Jun 18, 2012; First Submitted Date: May 30, 2011. Copyright Statement: American Psychological Association. 2012.},
author = {Schimmack, Ulrich},
doi = {10.1037/a0029487},
file = {::},
issn = {1082-989X},
journal = {Psychological Methods},
keywords = {Credibility,Experimentation,Humans,Psychology,Publication Bias,Research Design,Sample Size,Scientific Communication,Statistical Power,Statistical Significance,Statistics as Topic,credibility,multiple-study articles,power,publication bias,results,sample size,significance},
month = {dec},
number = {4},
pages = {551--566},
publisher = {American Psychological Association},
title = {{The ironic effect of significant results on the credibility of multiple-study articles}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2012-23130-001{\&}site=ehost-live http://uli.schimmack@utoronto.ca},
volume = {17},
year = {2012}
}
@article{Kaufman2018a,
abstract = {How does the current replication crisis, along with other recent psychological trends, affect scientific creativity? To answer this question, we consider current debates regarding replication through the lenses of creativity research and theory. Both scientific work and creativity require striking a balance between ideation and implementation and between freedom and constraints. However, current debates about replication and some of the emerging guidelines stemming from them threaten this balance and run the risk of stifling innovation. Although we recognize the importance of doing rigorous science, we argue that any 'one size fits all' research guidelines being proposed or enforced will do more harm than good for scientific creativity. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
address = {Kaufman, James C., Neag School of Education, University of Connecticut, 2131 Hillside Rd., Unit 3007, Storrs, CT, US, 06269-3007},
annote = {Accession Number: 2018-34136-007. PMID: 29961418 Partial author list: First Author {\&} Affiliation: Kaufman, James C.; Neag School of Education, University of Connecticut, Storrs, CT, US. Other Publishers: Blackwell Publishing; Wiley-Blackwell Publishing Ltd. Release Date: 20180806. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Experimental Replication; Innovation; Methodology. Minor Descriptor: Creativity. Classification: Research Methods {\&} Experimental Design (2260). Population: Human (10). Page Count: 9. Issue Publication Date: Jul, 2018. Copyright Statement: The Author(s). 2018.},
author = {Kaufman, James C and Glǎveanu, Vlad P},
doi = {10.1177/1745691617753947},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {Creativity,Experimental Replication,Innovation,Methodology,creativity,ideation,implementation,innovation,replication crisis,science},
month = {jul},
number = {4},
pages = {457--465},
publisher = {Sage Publications},
title = {{The road to uncreative science is paved with good intentions: Ideas, implementations, and uneasy balances}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2018-34136-007{\&}site=ehost-live orcid: 0000-0003-0595-2820 http://james.kaufman@uconn.edu},
volume = {13},
year = {2018}
}
@article{Byrd2007,
abstract = {Purpose: The purpose of this study was to review research published by Educational Administration Quarterly (EAQ) during the past 10 years to determine if confidence intervals and effect sizes were being reported as recommended by the American Psychological Association (APA) Publication Manual. Research Design: The author examined 49 volumes of the journal, which included 245 articles from the most recent 10-year time frame. As the debate on statistical reform is focused on reporting confidence intervals and properly interpreting p values and null hypothesis statistical testing (NHST), the author examined all empirical studies from the 10-year time frame to determine if confidence intervals were included in the results and, if so, interpreted correctly. Because effect size measures are, alongside confidence intervals, at the heart of statistical reform goals in psychology, the author also examined effect-size practices in EAQ. Descriptive statistics (frequencies and percentages) were utilized to report the findings of the analysis. Findings: The results of the descriptive analysis indicated that effect sizes were being reported in the majority of quantitative studies with limited interpretation. In contrast, no quantitative study examined from the 10-year time frame reported confidence intervals, despite the recommendations of the APA Task Force on Statistical Inference and guidelines outlined in the most recent edition of the APA Publication Manual. Recommendations for statistical reform in reporting quantitative results in EAQ are presented, and future direction is discussed. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
annote = {Accession Number: 2008-11509-004. Partial author list: First Author {\&} Affiliation: Byrd, Jimmy K.; University of North Texas-Denton, Denton, TX, US. Release Date: 20090105. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Print. Document Type: Journal Article. Language: English. Major Descriptor: Confidence Limits (Statistics); Effect Size (Statistical); Hypothesis Testing; Scientific Communication; Statistics. Classification: Educational Psychology (3500). Population: Human (10). Methodology: Empirical Study; Quantitative Study. References Available: Y. Page Count: 11. Issue Publication Date: Aug, 2007.},
author = {Byrd, Jimmy K},
doi = {10.1177/0013161X06297137},
issn = {0013-161X},
journal = {Educational Administration Quarterly},
keywords = {Confidence Limits (Statistics),Educational Administration Quarterly,Effect Size (Statistical),Hypothesis Testing,Scientific Communication,Statistics,confidence intervals,effect sizes,null hypothesis statistical testing,p values,statistical reform},
month = {aug},
number = {3},
pages = {381--391},
publisher = {Sage Publications},
title = {{A call for statistical reform in EAQ}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2008-11509-004{\&}site=ehost-live http://byrd@coe.unt.edu},
volume = {43},
year = {2007}
}
@article{Rubin2017,
abstract = {Hypothesizing after the results are known, or HARKing, occurs when researchers check their research results and then add or remove hypotheses on the basis of those results without acknowledging this process in their research report (Kerr, 1998). In the present article, I discuss 3 forms of HARKing: (a) using current results to construct post hoc hypotheses that are then reported as if they were a priori hypotheses; (b) retrieving hypotheses from a post hoc literature search and reporting them as a priori hypotheses; and (c) failing to report a priori hypotheses that are unsupported by the current results. These 3 types of HARKing are often characterized as being bad for science and a potential cause of the current replication crisis. In the present article, I use insights from the philosophy of science to present a more nuanced view. Specifically, I identify the conditions under which each of these 3 types of HARKing is most and least likely to be bad for science. I conclude with a brief discussion about the ethics of each type of HARKing. (PsycINFO Database Record (c) 2019 APA, all rights reserved)},
address = {Rubin, Mark, School of Psychology, The University of Newcastle, Behavioural Sciences Building, Callaghan, NSW, Australia, 2308},
annote = {Accession Number: 2018-70217-001. Partial author list: First Author {\&} Affiliation: Rubin, Mark; The University of Newcastle, Callaghan, NSW, Australia. Other Publishers: Sage Publications. Release Date: 20190328. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Experimental Ethics; Hypothesis Testing; Knowledge of Results; Scientific Communication. Minor Descriptor: Experimental Replication; Prediction. Classification: Professional Ethics {\&} Standards {\&} Liability (3450). Page Count: 13. Issue Publication Date: Dec, 2017. Publication History: First Posted Date: Oct 26, 2017; Accepted Date: Aug 30, 2017; Revised Date: Aug 4, 2017; First Submitted Date: May 1, 2017. Copyright Statement: American Psychological Association. 2017.},
author = {Rubin, Mark},
doi = {10.1037/gpr0000128},
issn = {1089-2680},
journal = {Review of General Psychology},
keywords = {Experimental Ethics,Experimental Replication,HARKing,Hypothesis Testing,Knowledge of Results,Prediction,Scientific Communication,accommodation,falsification,prediction,replication crisis},
month = {dec},
number = {4},
pages = {308--320},
publisher = {Educational Publishing Foundation},
title = {{When does HARKing hurt? Identifying when different types of undisclosed post hoc hypothesizing harm scientific progress}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2018-70217-001{\&}site=ehost-live orcid: 0000-0002-6483-8561 http://mark.rubin@newcastle.edu.au},
volume = {21},
year = {2017}
}
@article{Bakker2011,
abstract = {In order to study the prevalence, nature (direction), and causes of reporting errors in psychology, we checked the consistency of reported test statistics, degrees of freedom, and p values in a random sample of high- and low-impact psychology journals. In a second study, we established the generality of reporting errors in a random sample of recent psychological articles. Our results, on the basis of 281 articles, indicate that around 18{\%} of statistical results in the psychological literature are incorrectly reported. Inconsistencies were more common in low-impact journals than in high-impact journals. Moreover, around 15{\%} of the articles contained at least one statistical conclusion that proved, upon recalculation, to be incorrect; that is, recalculation rendered the previously significant result insignificant, or vice versa. These errors were often in line with researchers' expectations. We classified the most common errors and contacted authors to shed light on the origins of the errors. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Bakker, Marjan, Department of Psychology, Psychological Methods, University of Amsterdam, Roetersstraat 15, 1018 WB, Amsterdam, Netherlands},
annote = {Accession Number: 2011-21469-006. PMID: 21494917 Other Journal Title: Behavior Research Methods {\&} Instrumentation; Behavior Research Methods, Instruments {\&} Computers. Partial author list: First Author {\&} Affiliation: Bakker, Marjan; Department of Psychology, Psychological Methods, University of Amsterdam, Amsterdam, Netherlands. Other Publishers: Psychonomic Society. Release Date: 20111114. Correction Date: 20120618. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Errors; Psychology; Scientific Communication; Statistics. Classification: Statistics {\&} Mathematics (2240). Population: Human (10). Methodology: Empirical Study; Quantitative Study. References Available: Y. Page Count: 13. Issue Publication Date: Sep, 2011. Publication History: First Posted Date: Apr 15, 2011. Copyright Statement: The Author(s). 2011.},
author = {Bakker, Marjan and Wicherts, Jelte M},
doi = {10.3758/s13428-011-0089-5},
issn = {1554-351X},
journal = {Behavior Research Methods},
keywords = {Errors,Humans,Periodicals as Topic,Psychology,Research Design,Scientific Communication,Statistics,Statistics as Topic,psychology journals,reporting errors,statistics},
month = {sep},
number = {3},
pages = {666--678},
publisher = {Springer},
title = {{The (mis)reporting of statistical results in psychology journals}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2011-21469-006{\&}site=ehost-live orcid: 0000-0003-2415-2933 http://M.Bakker1@uva.nl},
volume = {43},
year = {2011}
}
@article{Kamin2010,
abstract = {Many statistics texts pose inferential statistical problems in a disjointed way. By using a simple five-step procedure as a template for statistical inference problems, the student can solve problems in an organized fashion. The problem and its solution will thus be a stand-by-itself organic whole and a single unit of thought and effort. The described procedure can be used for both parametric and nonparametric inferential tests. The example given is a chi-square goodness-of-fit test of a genetics experiment involving a dihybrid cross in corn that follows a 9:3:3:1 ratio. This experimental analysis is commonly done in introductory biology labs. [ABSTRACT FROM AUTHOR]},
annote = {Accession Number: 51317244; Source Information: Mar2010, Vol. 72 Issue 3, p186; Subject Term: STATISTICS; Subject Term: DISTRIBUTION (Probability theory); Subject Term: PROBLEM solving; Subject Term: BIOLOGY education; Subject Term: HYBRID corn; Subject Term: CORN breeding; Subject Term: ACTIVE learning; Subject Term: ; Number of Pages: 3p; ; Illustrations: 1 Chart, 2 Graphs; ; Document Type: Article; ; Full Text Word Count: 1751;},
author = {Kamin, Lawrence F},
issn = {00027685},
journal = {American Biology Teacher (National Association of Biology Teachers)},
keywords = {ACTIVE learning,BIOLOGY education,CORN breeding,DISTRIBUTION (Probability theory),HYBRID corn,PROBLEM solving,STATISTICS},
month = {mar},
number = {3},
pages = {186--188},
title = {{Using a Five-Step Procedure for Inferential Statistical Analyses.}},
url = {http://10.0.5.245/abt.2010.72.3.11 https://search.ebscohost.com/login.aspx?direct=true{\&}db=trh{\&}AN=51317244{\&}site=ehost-live},
volume = {72},
year = {2010}
}
@article{Caperos2013,
abstract = {Background: Recent reviews have drawn attention to frequent consistency errors when reporting statistical results. Method: We have reviewed the statistical results reported in 186 articles published in four Spanish psychology journals. Of these articles, 102 contained at least one of the statistics selected for our study: Fisher-F, Student-t and Pearson-X2. Results: Out of the 1,212 complete statistics reviewed, 12.2{\%} presented a consistency error, meaning that the reported p-value did not correspond to the reported value of the statistic and its degrees of freedom. In 2.3{\%} of the cases, the correct calculation would have led to a different conclusion than the reported one. In terms of articles, 48{\%} included at least one consistency error, and 17.6{\%} would have to change at least one conclusion. In meta-analytical terms, with a focus on effect size, consistency errors can be considered substantial in 9.5{\%} of the cases. Conclusion: These results imply a need to improve the quality and precision with which statistical results are reported in Spanish psychology journals. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Caperos, Jos{\'{e}} Manuel, Faculty of Humanities and Social Sciences, Comillas Pontifical University, 28049, Madrid, Spain},
annote = {Accession Number: 2013-26760-020. PMID: 23910759 Partial author list: First Author {\&} Affiliation: Caperos, Jos{\'{e}} Manuel; Universidad Pontificia de Comillas, Madrid, Spain. Release Date: 20131104. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Print. Document Type: Journal Article. Language: English. Major Descriptor: Analysis of Variance; Errors; Scientific Communication; Statistical Correlation; Statistics. Minor Descriptor: Psychology. Classification: Statistics {\&} Mathematics (2240). Population: Human (10). Location: Spain. Methodology: Literature Review. References Available: Y. Page Count: 7. Issue Publication Date: Aug, 2013. Publication History: Accepted Date: Jul 31, 2012; First Submitted Date: Mar 24, 2012. Copyright Statement: Psicothema. 2013.},
author = {Caperos, Jos{\'{e}} Manuel and Pardo, Antonio},
issn = {0214-9915},
journal = {Psicothema},
keywords = {Analysis of Variance,Errors,Periodicals as Topic,Psychology,Research Design,Scientific Communication,Spain,Statistical Correlation,Statistics,Statistics as Topic,analysis of variance,consistency errors,p-values,psychology journals,statistical correlation,statistics},
month = {aug},
number = {3},
pages = {408--414},
publisher = {Colegio Oficial de Psic{\'{o}}logos del Principado de Asturias},
title = {{Consistency errors in p-values reported in Spanish psychology journals}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2013-26760-020{\&}site=ehost-live http://jcaperos@upcomillas.es},
volume = {25},
year = {2013}
}
@article{Kinraide2003,
abstract = {Focuses on hypothesis testing in scientific method.  Description of the method of hypothesis; Concept of fact; Discussion on manifest fact and inferential fact; Features of scientific facts.},
annote = {Accession Number: 10651389; Source Information: Aug2003, Vol. 65 Issue 6, p419; Subject Term: HYPOTHESIS; Subject Term: REASONING (Logic); Subject Term: SCIENTIFIC method; Subject Term: SYMBOLIC {\&} mathematical logic; Subject Term: THOUGHT {\&} thinking; Subject Term: LOGIC; Subject Term: ; Number of Pages: 6p; ; Document Type: Article;},
author = {Kinraide, Thomas B and Denison, R Ford},
issn = {00027685},
journal = {American Biology Teacher (National Association of Biology Teachers)},
keywords = {HYPOTHESIS,LOGIC,REASONING (Logic),SCIENTIFIC method,SYMBOLIC {\&} mathematical logic,THOUGHT {\&} thinking},
month = {aug},
number = {6},
pages = {419},
title = {{Strong inference: The WAY of SCIENCE.}},
url = {http://10.0.6.126/0002-7685(2003)065[0419:SITWOS]2.0.CO http://0.0.0.2 https://search.ebscohost.com/login.aspx?direct=true{\&}db=trh{\&}AN=10651389{\&}site=ehost-live},
volume = {65},
year = {2003}
}
@article{Fidler2004,
abstract = {Since the mid-1980s, confidence intervals (CIs) have been standard in medical journals. We sought lessons for psychology from medicine's experience with statistical reform by investigating two attempts by Kenneth Rothman to change statistical practices. We examined 594 American Journal of Public Health (AJPH) articles published between 1982 and 2000 and 110 Epidemiology articles published in 1990 and 2000. Rothman's editorial instruction to report CIs and not p values was largely effective: In AJPH, sole reliance on p values dropped from 63{\%} to 5{\%}, and Cl reporting rose from 10{\%} to 54{\%}; Epidemiology showed even stronger compliance. However, compliance was superficial: Very few authors referred to CIs when discussing results. The results of our survey support what other research has indicated: Editorial policy alone is not a sufficient mechanism for statistical reform. Achieving substantial, desirable change will require further guidance regarding use and interpretation of CIs and appropriate effect size measures. Necessary steps will include studying researchers' understanding of CIs, improving education, and developing empirically justified recommendations for improved statistical practice. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Fidler, Fiona, Department of history and Philosophy of Science, University of Melbourne, 3010, Melbourne, VIC, Australia},
annote = {Accession Number: 2004-10571-008. PMID: 14738519 Partial author list: First Author {\&} Affiliation: Fidler, Fiona; La Trobe University, Melbourne, VIC, Australia. Other Publishers: Sage Publications; Wiley-Blackwell Publishing Ltd. Release Date: 20040217. Correction Date: 20100104. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Confidence Limits (Statistics); Effect Size (Statistical); Experimentation; Professional Standards; Scientific Communication. Minor Descriptor: Medical Sciences; Psychology. Classification: Research Methods {\&} Experimental Design (2260); Professional Ethics {\&} Standards {\&} Liability (3450). References Available: Y. Page Count: 8. Issue Publication Date: Mar, 2004.},
author = {Fidler, Fiona and Thomason, Neil and Cumming, Geoff and Finch, Sue and Leeman, Joanna},
doi = {10.1111/j.0963-7214.2004.01502008.x},
issn = {0956-7976},
journal = {Psychological Science},
keywords = {Biomedical Research,Confidence Intervals,Confidence Limits (Statistics),Curriculum,Editorial Policies,Effect Size (Statistical),Experimentation,Forecasting,Humans,Manuscripts, Medical,Medical Sciences,Periodicals as Topic,Professional Standards,Psychology,Scientific Communication,Statistics as Topic,Thinking,confidence intervals,editorial policy,medical journals,psychology journals,research,statistical practice},
month = {mar},
number = {2},
pages = {119--126},
publisher = {Blackwell Publishing},
title = {{Editors Can Lead Researchers to Confidence Intervals, but Can't Make Them Think: Statistical Reform Lessons From Medicine}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2004-10571-008{\&}site=ehost-live http://fidlerfm@unimelb.edu.au},
volume = {15},
year = {2004}
}
@article{Montori2008,
abstract = {A new paradigm for medical practice is emerging. Evidence-based medicine de-emphasizes intuition, unsystematic clinical experience, and pathophysiologic rationale as sufficient grounds for clinical decision making and stresses the examination of evidence from clinical research. Evidence- based medicine requires new skills of the physician, including efficient literature searching and the application of formal rules of evidence evaluating the clinical literature.See PDF for full text of the original JAMA article.},
author = {Montori, Victor M. and Guyatt, Gordon H.},
doi = {10.1001/jama.300.15.1814},
file = {::},
issn = {0098-7484},
journal = {JAMA},
keywords = {evidence-based medicine},
month = {oct},
number = {15},
pages = {1814},
publisher = {American Medical Association},
title = {{Progress in Evidence-Based Medicine}},
url = {http://jama.jamanetwork.com/article.aspx?doi=10.1001/jama.300.15.1814},
volume = {300},
year = {2008}
}
@article{Onwuegbuzie2010,
abstract = {Most college students are required to enroll in statistics and quantitative research methodology courses as a necessary part of their degree programmes. Unfortunately, many students report high lev...},
author = {Onwuegbuzie, Anthony J. and Wilson, Vicki A.},
doi = {10.1080/1356251032000052447},
journal = {https://doi.org/10.1080/1356251032000052447},
publisher = { Taylor {\&} Francis Group },
title = {{Statistics Anxiety: Nature, etiology, antecedents, effects, and treatments--a comprehensive review of the literature}},
url = {https://srhe.tandfonline.com/doi/abs/10.1080/1356251032000052447{\#}.XYKbNpNKgUE},
year = {2010}
}
@article{Holm2018,
abstract = {ABSTRACTWe investigate the relationship between doctoral students' attitudes towards scientific misconduct and their self-reported behavior. 203 questionnaires were distributed to doctoral candidat...},
author = {Holm, S{\o}ren and Hofmann, Bj{\o}rn},
doi = {10.1080/08989621.2018.1485493},
issn = {0898-9621},
journal = {Accountability in Research},
keywords = {Dishonesty,doctoral students,fabrication,falsification,integrity,misbehavior,misconduct,plagiarism,science ethics},
month = {jul},
number = {5},
pages = {290--300},
publisher = {Taylor {\&} Francis},
title = {{Associations between attitudes towards scientific misconduct and self-reported behavior}},
url = {https://www.tandfonline.com/doi/full/10.1080/08989621.2018.1485493},
volume = {25},
year = {2018}
}
@article{Lang2015,
author = {Lang, Thomas A. and Altman, Douglas G.},
doi = {10.1016/J.IJNURSTU.2014.09.006},
file = {::},
issn = {0020-7489},
journal = {International Journal of Nursing Studies},
month = {jan},
number = {1},
pages = {5--9},
publisher = {Pergamon},
title = {{Basic statistical reporting for articles published in Biomedical Journals: The “Statistical Analyses and Methods in the Published Literature” or the SAMPL Guidelines}},
url = {https://www.sciencedirect.com/science/article/pii/S0020748914002545?via{\%}3Dihub},
volume = {52},
year = {2015}
}
@article{Altman1982,
author = {Altman, Douglas G.},
doi = {10.1002/sim.4780010109},
issn = {02776715},
journal = {Statistics in Medicine},
keywords = {Ethics Statistical refereeing,Medical journals,Statistical errors,Statistical guidelines},
month = {jan},
number = {1},
pages = {59--71},
publisher = {John Wiley {\&} Sons, Ltd},
title = {{Statistics in medical journals}},
url = {http://doi.wiley.com/10.1002/sim.4780010109},
volume = {1},
year = {1982}
}
@article{Curran-Everett2000,
abstract = {Explores the uncertainty inherent to the process of scientific discovery.  Value of mathematics to science; Uncertainty in measurements; Importance of sampling in the process of scientific discovery.},
annote = {Accession Number: 3161265; Source Information: Apr2000, Vol. 62 Issue 4, p266; Subject Term: DISCOVERIES in science; Subject Term: MATHEMATICS; Subject Term: SCIENCE education; Subject Term: ; Number of Pages: 10p; ; Illustrations: 1 Diagram, 5 Charts, 7 Graphs; ; Document Type: Article;},
author = {Curran-Everett, Douglas},
issn = {00027685},
journal = {American Biology Teacher (National Association of Biology Teachers)},
keywords = {DISCOVERIES in science,MATHEMATICS,SCIENCE education},
month = {apr},
number = {4},
pages = {266},
title = {{The Process of Scientific Discovery: How Certain Can We Be?}},
url = {http://10.0.6.126/0002-7685(2000)062[0266:TPOSDH]2.0.CO http://0.0.0.2 https://search.ebscohost.com/login.aspx?direct=true{\&}db=trh{\&}AN=3161265{\&}site=ehost-live},
volume = {62},
year = {2000}
}
@article{Rodgers2018,
abstract = {Psychological science is in the midst of what has been referred to as a 'replication crisis.' The realization that many individual findings do not replicate in new studies has led to questioning the scientific method and the integrity of psychological science. We review the history of the replication crisis, and its positive and negative effects. Most of the elements of the replication crisis are re-emergent issues that methodologists have studied in the past, but to which researchers have become increasingly sensitized. Ultimately, we argue the value of the replication crisis, in that it has led to positive self-examination within our science and to the development of new and innovative methodology. The field is emerging from the replication crisis with a realization of the importance of multiple replication efforts, and an improved ethic of openness and transparency in the conduct of research. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
address = {Rodgers, Joseph Lee, Department of Psychology and Human Development, Peabody College of Education and Human Development, Vanderbilt University, 230 Appleton Place {\#}552, Nashville, TN, US, 37203-5721},
annote = {Accession Number: 2018-47911-018. Partial author list: First Author {\&} Affiliation: Rodgers, Joseph Lee; Vanderbilt University, Nashville, TN, US. Release Date: 20181119. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Experimental Replication; Policy Making; Psychology; Scientific Communication. Classification: Research Methods {\&} Experimental Design (2260). Population: Human (10). Age Group: Adulthood (18 yrs {\&} older) (300). Page Count: 8. Issue Publication Date: Mar, 2018. Copyright Statement: The Author(s). 2017.},
author = {Rodgers, Joseph Lee and Shrout, Patrick E},
doi = {10.1177/2372732217749254},
issn = {2372-7330},
journal = {Policy Insights from the Behavioral and Brain Sciences},
keywords = {Bayesian methods,Experimental Replication,NHST,Policy Making,Psychology,Scientific Communication,meta-analysis,policymakers,replication crisis: methodology},
month = {mar},
number = {1},
pages = {134--141},
publisher = {Sage Publications},
title = {{Psychology's replication crisis as scientific opportunity: A pr{\'{e}}cis for policymakers}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2018-47911-018{\&}site=ehost-live http://joseph.l.rodgers@vanderbilt.edu},
volume = {5},
year = {2018}
}
@article{Airoldi2010,
abstract = {PNAS article classification is rooted in long-standing disciplinary divisions that do not necessarily reflect the structure of modern scientific research. We reevaluate that structure using latent pattern models from statistical machine learning, also known as mixed-membership models, that identify semantic structure in co-occurrence of words in the abstracts and references. Our findings suggest that the latent dimensionality of patterns underlying PNAS research articles in the Biological Sciences is only slightly larger than the number of categories currently in use, but it differs substantially in the content of the categories. Further, the number of articles that are listed under multiple categories is only a small fraction of what it should be. These findings together with the sensitivity analyses suggest ways to reconceptualize the organization of papers published in PNAS. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Fienberg, Stephen E., Department of Statistics, Carnegie Mellon University, Pittsburgh, PA, US, 15213},
annote = {Accession Number: 2011-02155-001. PMID: 21078953 Partial author list: First Author {\&} Affiliation: Airoldi, Edoardo M.; Department of Statistics, Harvard University, Cambridge, MA, US. Release Date: 20110307. Correction Date: 20170123. Publication Type: Journal (0100), Peer Reviewed Journal (0110). Format Covered: Electronic. Document Type: Journal Article. Language: English. Major Descriptor: Sciences; Scientific Communication; Taxonomies. Classification: Professional Psychological {\&} Health Personnel Issues (3400). Population: Human (10). Supplemental Data: Text Internet. References Available: Y. Page Count: 6. Issue Publication Date: Dec 7, 2010.},
author = {Airoldi, Edoardo M and Erosheva, Elena A and Fienberg, Stephen E and Joutard, Cyrille and Love, Tanzy and Shringarpure, Suyash},
doi = {10.1073/pnas.1013452107},
issn = {0027-8424},
journal = {PNAS Proceedings of the National Academy of Sciences of the United States of America},
keywords = {Classification,Methods,National Academy of Sciences (U.S.),Periodicals as Topic,Proceedings of the National Academy of Sciences,Publications,Sciences,Scientific Communication,Statistics as Topic,Taxonomies,United States,classification,journal articles},
month = {dec},
number = {49},
pages = {20899--20904},
publisher = {National Academy of Sciences},
title = {{Reconceptualizing the classification of PNAS articles}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2011-02155-001{\&}site=ehost-live http://fienberg@stat.cmu.edu},
volume = {107},
year = {2010}
}
@article{Riendeau2007,
abstract = {The article explains effective means of making data accessible, demonstrated by presenting same information in different formats to the author's students. Students are asked to answer fill-in-the-blank questions based on the scientific data presented. She uses an explanation in paragraph format first, a random order table second, an organized table third, and finally the data appears on a graph. Students discuss the importance of clear presentation. Samples of her worksheets are included.},
annote = {Accession Number: 23902316; Source Information: Feb2007, Vol. 74 Issue 2, p52; Subject Term: DATA analysis; Subject Term: CHARTS, diagrams, etc.; Subject Term: ACCESS to information; Subject Term: SCIENCE education; Subject Term: TEACHING aids; Subject Term: ; Number of Pages: 3p; ; Document Type: Article;},
author = {Riendeau, Diane},
issn = {00368555},
journal = {Science Teacher},
keywords = {ACCESS to information,CHARTS, diagrams, etc.,DATA analysis,SCIENCE education,TEACHING aids},
month = {feb},
number = {2},
pages = {52--55},
title = {{Effective Data Representation.}},
url = {https://search.ebscohost.com/login.aspx?direct=true{\&}db=trh{\&}AN=23902316{\&}site=ehost-live},
volume = {74},
year = {2007}
}
@misc{Mogil2017,
abstract = {Jeffrey S. Mogil and Malcolm R. Macleod propose a new kind of paper that combines the flexibility of basic research with the rigour of clinical trials.},
author = {Mogil, Jeffrey S. and Macleod, Malcolm R.},
booktitle = {Nature},
doi = {10.1038/542409a},
file = {::},
issn = {14764687},
number = {7642},
title = {{No publication without confirmation}},
volume = {542},
year = {2017}
}
@article{Cristea2018,
abstract = {{\textcopyright} 2018 Cristea, Ioannidis. This is an open access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited. P values represent a widely used, but pervasively misunderstood and fiercely contested method of scientific inference. Display items, such as figures and tables, often containing the main results, are an important source of P values. We conducted a survey comparing the overall use of P values and the occurrence of significant P values in display items of a sample of articles in the three top multidisciplinary journals (Nature, Science, PNAS) in 2017 and, respectively, in 1997. We also examined the reporting of multiplicity corrections and its potential influence on the proportion of statistically significant P values. Our findings demonstrated substantial and growing reliance on P values in display items, with increases of 2.5 to 14.5 times in 2017 compared to 1997. The overwhelming majority of P values (94{\%}, 95{\%} confidence interval [CI]  92{\%} to 96{\%}) were statistically significant. Methods to adjust for multiplicity were almost non-existent in 1997, but reported in many articles relying on P values in 2017 (Nature 68{\%}, Science 48{\%}, PNAS 38{\%}). In their absence, almost all reported P values were statistically significant (98{\%}, 95{\%} CI 96{\%} to 99{\%}). Conversely, when any multiplicity corrections were described, 88{\%} (95{\%} CI 82{\%} to 93{\%}) of reported P values were statistically significant. Use of Bayesian methods was scant (2.5{\%}) and rarely (0.7{\%}) articles relied exclusively on Bayesian statistics. Overall, wider appreciation of the need for multiplicity corrections is a welcome evolution, but the rapid growth of reliance on P values and implausibly high rates of reported statistical significance are worrisome.},
author = {Cristea, Ioana Alina and Ioannidis, John P.A.},
doi = {10.1371/journal.pone.0197440},
issn = {19326203},
journal = {PLoS ONE},
number = {5},
title = {{P values in display items are ubiquitous and almost invariably significant: A survey of top science journals}},
volume = {13},
year = {2018}
}
@misc{Hedges2018,
abstract = {Formal empirical assessments of replication have recently become more prominent in several areas of science, including psychology. These assessments have used different statistical approaches to determine if a finding has been replicated. The purpose of this article is to provide several alternative conceptual frameworks that lead to different statistical analyses to test hypotheses about replication. All of these analyses are based on statistical methods used in meta-analysis. The differences among the methods described involve whether the burden of proof is placed on replication or nonreplication, whether replication is exact or allows for a small amount of “negligible heterogeneity,” and whether the studies observed are assumed to be fixed (constituting the entire body of relevant evidence) or are a sample from a universe of possibly relevant studies. The statistical power of each of these tests is computed and shown to be low in many cases, raising issues of the interpretability of tests for replication. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
author = {Hedges, Larry V. and Schauer, Jacob M.},
booktitle = {Psychological Methods},
doi = {10.1037/met0000189},
issn = {1082989X},
title = {{Statistical Analyses for Studying Replication: Meta-Analytic Perspectives}},
year = {2018}
}
@article{Fife2019c,
author = {Fife, Dustin A.},
doi = {10.31234/osf.io/kh9c3},
journal = {PsyArxiv},
publisher = {Available at www.Jamovi.com and www.github.com/dustinfife/flexplot},
title = {{Flexplot: Graphical-Based Data Analysis}},
year = {2020}
}
@article{Fife2019a,
author = {Fife, Dustin A. and Rodgers, Joseph Lee},
journal = {Unpublished Manuscript},
title = {{Exonerating EDA: Addressing the Replication Crisis By Expanding the EDA/CDA Continuum}},
url = {http://quantpsych.net/fife-exonerating-eda-draft-oct2019-df-edits/},
year = {2019}
}
@misc{Fife2019b,
author = {Fife, Dustin A.},
title = {{A Graphic is Worth a Thousand Test Statistics: Mapping Visuals onto Common Analyses}},
url = {http://rpubs.com/dustinfife/528244},
year = {2019}
}
@article{Lv2019,
author = {Lv, Jing and Maeda, Yukiko},
doi = {10.1080/10705511.2019.1646651},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {sep},
pages = {1--24},
title = {{Evaluation of the Efficacy of Meta-Analytic Structural Equation Modeling with Missing Correlations}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2019.1646651},
year = {2019}
}
@article{Bakan1966,
author = {Bakan, David},
doi = {10.1037/h0020412},
issn = {1939-1455},
journal = {Psychological Bulletin},
number = {6},
pages = {423--437},
title = {{The test of significance in psychological research.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/h0020412},
volume = {66},
year = {1966}
}
@article{Ferron2007,
author = {Ferron, John M and Hess, Melinda R},
doi = {10.3102/1076998606298025},
file = {::},
journal = {Source: Journal of Educational and Behavioral Statistics},
number = {1},
pages = {110--120},
title = {{Estimation in SEM: A Concrete Example}},
url = {https://www.jstor.org/stable/pdf/20172071.pdf?refreqid=excelsior{\%}3A208fd03f93efa7e7376fd6e6994c83c8},
volume = {32},
year = {2007}
}
@misc{Cheung2004a,
abstract = {Organizational researchers are sometimes interested in testing if independent or dependent correlation coefficients are equal. Olkin and Finn and Steiger proposed several statistical procedures to test dependent correlation coefficients in a single group, whereas meta-analytic procedures can be used to test independent correla-tion coefficients in two or more groups. Because computer programming is usually involved, applied researchers may find these procedures hard to implement, espe-cially in testing the dependent correlation coefficients. This article suggests using a structural equation modeling (SEM) approach as a unified framework to test in-dependent and dependent correlational hypotheses. To demonstrate the compara-bility among these approaches, examples and ad hoc simulation studies are used. Advantages of the SEM approach are also discussed.},
author = {Cheung, Mike W.L. and Chan, Wai},
booktitle = {Organizational Research Methods},
doi = {10.1177/1094428104264024},
file = {::},
issn = {10944281},
keywords = {Structural equation modeling,Testing dependent correlation coefficients,Testing independent correlation coefficients},
month = {apr},
number = {2},
pages = {206--223},
title = {{Testing Dependent Correlation Coefficients via Structural Equation Modeling}},
volume = {7},
year = {2004}
}
@misc{Cheung2004,
abstract = {Organizational researchers are sometimes interested in testing if independent or dependent correlation coefficients are equal. Olkin and Finn and Steiger proposed several statistical procedures to test dependent correlation coefficients in a single group, whereas meta-analytic procedures can be used to test independent correla-tion coefficients in two or more groups. Because computer programming is usually involved, applied researchers may find these procedures hard to implement, espe-cially in testing the dependent correlation coefficients. This article suggests using a structural equation modeling (SEM) approach as a unified framework to test in-dependent and dependent correlational hypotheses. To demonstrate the compara-bility among these approaches, examples and ad hoc simulation studies are used. Advantages of the SEM approach are also discussed.},
author = {Cheung, Mike W.L. and Chan, Wai},
booktitle = {Organizational Research Methods},
doi = {10.1177/1094428104264024},
file = {::},
issn = {10944281},
keywords = {Structural equation modeling,Testing dependent correlation coefficients,Testing independent correlation coefficients},
month = {apr},
number = {2},
pages = {206--223},
title = {{Testing Dependent Correlation Coefficients via Structural Equation Modeling}},
volume = {7},
year = {2004}
}
@article{Aronson2019,
abstract = {Teach people to think critically about claims and comparisons using these concepts, urge Andrew D. Oxman and an alliance of 24 researchers — they will make better decisions.},
author = {Aronson, Jeffrey K. and Barends, Eric and Boruch, Robert and Brennan, Marnie and Chalmers, Iain and Chislett, Joe and Cunliffe-Jones, Peter and Dahlgren, Astrid and Gaarder, Marie and Haines, Andy and Heneghan, Carl and Matthews, Robert and Maynard, Brandy and Oxman, Andrew D. and Oxman, Matt and Pullin, Andrew and Randall, Nicola and Roddam, Hazel and Schoonees, Anel and Sharples, Jonathan and Stewart, Ruth and Stott, Janet and Tallis, Raymond and Thomas, Nerys and Vale, Luke},
doi = {10.1038/d41586-019-02407-9},
issn = {0028-0836},
journal = {Nature},
keywords = {Education,Policy,Society},
month = {aug},
number = {7769},
pages = {303--306},
publisher = {Nature Publishing Group},
title = {{Key concepts for making informed choices}},
url = {http://www.nature.com/articles/d41586-019-02407-9},
volume = {572},
year = {2019}
}
@article{Wickham2010,
author = {Wickham, Hadley},
doi = {10.1198/jcgs.2009.07098},
file = {::},
journal = {Journal of Computational and Graphical Statistics},
keywords = {Grammar of graphics,Statistical graphics},
number = {1},
pages = {3--28},
title = {{A Layered Grammar of Graphics}},
url = {https://vita.had.co.nz/papers/layered-grammar.pdf},
volume = {19},
year = {2010}
}
@article{Bayarri2016,
abstract = {Much of science is (rightly or wrongly) driven by hypothesis testing. Even in situations where the hypothesis testing paradigm is correct, the common practice of basing inferences solely on p-values has been under intense criticism for over 50 years. We propose, as an alternative, the use of the odds of a correct rejection of the null hypothesis to incorrect rejection. Both pre-experimental versions (involving the power and Type I error) and post-experimental versions (depending on the actual data) are considered. Implementations are provided that range from depending only on the p-value to consideration of full Bayesian analysis. A surprise is that all implementations-even the full Bayesian analysis-have complete frequentist justification. Versions of our proposal can be implemented that require only minor modifications to existing practices yet overcome some of their most severe shortcomings.},
author = {Bayarri, M. J. and Benjamin, Daniel J. and Berger, James O. and Sellke, Thomas M.},
doi = {10.1016/j.jmp.2015.12.007},
issn = {10960880},
journal = {Journal of Mathematical Psychology},
title = {{Rejection odds and rejection ratios: A proposal for statistical practice in testing hypotheses}},
volume = {72},
year = {2016}
}
@article{Bakker2016,
abstract = {Many psychology studies are statistically underpowered. In part, this may be because many researchers rely on intuition, rules of thumb, and prior practice (along with practical considerations) to determine the number of subjects to test. In Study 1, we surveyed 291 published research psychologists and found large discrepancies between their reports of their preferred amount of power and the actual power of their studies (calculated from their reported typical cell size, typical effect size, and acceptable alpha). Furthermore, in Study 2, 89{\%} of the 214 respondents overestimated the power of specific research designs with a small expected effect size, and 95{\%} underestimated the sample size needed to obtain .80 power for detecting a small effect. Neither researchers' experience nor their knowledge predicted the bias in their self-reported power intuitions. Because many respondents reported that they based their sample sizes on rules of thumb or common practice in the field, we recommend that researchers conduct and report formal power analyses for their studies.},
author = {Bakker, Marjan and Hartgerink, Chris H.J. and Wicherts, Jelte M. and van der Maas, Han L.J.},
doi = {10.1177/0956797616647519},
issn = {14679280},
journal = {Psychological Science},
number = {8},
title = {{Researchers' Intuitions About Power in Psychological Research}},
volume = {27},
year = {2016}
}
@book{Fisher1937,
address = {Edinburgh},
author = {Fisher, R. A.},
edition = {2nd},
publisher = {Oliver and Boyd},
title = {{The Design of Experiments}},
year = {1937}
}
@incollection{Gigerenzer1993,
author = {Gigerenzer, Gerd},
booktitle = {A Handbook for Data Analysis in the Behavioral Sciences: Volume 1},
pages = {311--339},
title = {{The Superego, the Ego, and the Id in Statistical Reasoning}},
url = {https://books.google.com/books?hl=en{\&}lr={\&}id=fTyYAgAAQBAJ{\&}oi=fnd{\&}pg=PA311{\&}dq=gigerenzer+1993{\&}ots=VsrLhhBw0{\_}{\&}sig=0531YUX-er2rPmCoY2SnG9A64Qw{\#}v=onepage{\&}q{\&}f=false},
year = {1993}
}
@article{Simonsohn2014a,
author = {Simonsohn, Uri},
doi = {10.2139/ssrn.2374040},
issn = {1556-5068},
journal = {SSRN Electronic Journal},
keywords = {Bayesian,chronological rejuvenation,p-hacking,publication bias,selective reporting},
month = {jan},
title = {{Posterior-Hacking: Selective Reporting Invalidates Bayesian Results Also}},
url = {http://www.ssrn.com/abstract=2374040},
year = {2014}
}
@article{Smaldino2016,
abstract = {Poor research design and data analysis encourage false-positive findings. Such poor methods persist despite perennial calls for improvement, suggesting that they result from something more than just misunderstanding. The persistence of poor methods results partly from incentives that favor them, leading to the natural selection of bad science. This dynamic requires no conscious strategizing---no deliberate cheating nor loafing---by scientists, only that publication is a principle factor for career advancement. Some normative methods of analysis have almost certainly been selected to further publication instead of discovery. In order to improve the culture of science, a shift must be made away from correcting misunderstandings and towards rewarding understanding. We support this argument with empirical evidence and computational modeling. We first present a 60-year meta-analysis of statistical power in the behavioral sciences and show that power has not improved despite repeated demonstrations of the necessity of increasing power. To demonstrate the logical consequences of structural incentives, we then present a dynamic model of scientific communities in which competing laboratories investigate novel or previously published hypotheses using culturally transmitted research methods. As in the real world, successful labs produce more "progeny", such that their methods are more often copied and their students are more likely to start labs of their own. Selection for high output leads to poorer methods and increasingly high false discovery rates. We additionally show that replication slows but does not stop the process of methodological deterioration. Improving the quality of research requires change at the institutional level.},
author = {Smaldino, Paul E. and McElreath, Richard},
doi = {10.1098/rsos.160384},
issn = {20545703},
journal = {Royal Society Open Science},
number = {9},
title = {{The natural selection of bad science}},
volume = {3},
year = {2016}
}
@article{Pustejovsky2014,
abstract = {Meta-analyses of the relationship between 2 continuous variables sometimes involves conversions between different effect sizes, but methodological literature offers conflicting guidance about how to make such conversions. This article provides methods for converting from a standardized mean difference to a correlation coefficient (and from there to Fisher's z) under 3 types of study designs: extreme groups, dichotomization of a continuous variable, and controlled experiments. Also provided are formulas and recommendations regarding how the sampling variance of effect size statistics should be estimated in each of these cases. The conversion formula for extreme groups designs, originally due to Feldt (1961), can be viewed as a generalization of Hunter and Schmidt's (1990) method for dichotomization designs. A simulation study examines the finite-sample properties of the proposed methods. The conclusion highlights areas where current guidance in the literature should be amended or clarified.},
author = {Pustejovsky, James E},
doi = {10.1037/a0033788},
issn = {1939-1463},
journal = {Psychological methods},
month = {mar},
number = {1},
pages = {92--112},
pmid = {24079923},
title = {{Converting from d to r to z when the design uses extreme groups, dichotomization, or experimental control.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/a0033788 http://www.ncbi.nlm.nih.gov/pubmed/24079923},
volume = {19},
year = {2014}
}
@article{Zhang2017,
abstract = {The most widely used statistical model for conducting moderation analysis is the moderated multiple regression (MMR) model. In MMR modeling, missing data could pose a challenge, mainly because the interaction term is a product of two or more variables and thus is a nonlinear function of the involved variables. In this study, we consider a simple MMR model, where the effect of the focal predictor X on the outcome Y is moderated by a moderator U. The primary interest is to find ways of estimating and testing the moderation effect with the existence of missing data in X. We mainly focus on cases when X is missing completely at random (MCAR) and missing at random (MAR). Three methods are compared: (a) Normal-distribution-based maximum likelihood estimation (NML); (b) Normal-distribution-based multiple imputation (NMI); and (c) Bayesian estimation (BE). Via simulations, we found that NML and NMI could lead to biased estimates of moderation effects under MAR missingness mechanism. The BE method outperformed NMI and NML for MMR modeling with missing data in the focal predictor, missingness depending on the moderator and/or auxiliary variables, and correctly specified distributions for the focal predictor. In addition, more robust BE methods are needed in terms of the distribution mis-specification problem of the focal predictor. An empirical example was used to illustrate the applications of the methods with a simple sensitivity analysis. (PsycINFO Database Record},
author = {Zhang, Qian and Wang, Lijuan},
doi = {10.1037/met0000104},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Bayesian estimation,missing data,moderation analysis,normal-distribution-based maximum likelihood estim,normal-distribution-based multiple imputation},
number = {4},
pages = {649--666},
title = {{Moderation Analysis with Missing Data in the Predictors}},
volume = {22},
year = {2017}
}
@article{VonHippel2009,
author = {von Hippel, Paul T.},
doi = {10.1111/j.1467-9531.2009.01215.x},
file = {::},
issn = {0081-1750},
journal = {Sociological Methodology},
month = {aug},
number = {1},
pages = {265--291},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{8. How to Impute Interactions, Squares, and other Transformed Variables}},
url = {http://journals.sagepub.com/doi/10.1111/j.1467-9531.2009.01215.x},
volume = {39},
year = {2009}
}
@article{Bozarth1972,
author = {Bozarth, Jerold D. and Roberts, Ralph R.},
doi = {10.1037/h0038034},
issn = {0003-066X},
journal = {American Psychologist},
number = {8},
pages = {774--775},
title = {{Signifying significant significance.}},
url = {http://content.apa.org/journals/amp/27/8/774},
volume = {27},
year = {1972}
}
@article{Rosenthal1979,
author = {Rosenthal, Robert},
doi = {10.1037/0033-2909.86.3.638},
issn = {0033-2909},
journal = {Psychological Bulletin},
number = {3},
pages = {638--641},
title = {{The file drawer problem and tolerance for null results.}},
url = {http://content.apa.org/journals/bul/86/3/638},
volume = {86},
year = {1979}
}
@article{Mendoza2004,
abstract = {Employee selection often involves a series of sequential tests (or hurdles). However, validation strategies under this complex design are not found in the literature. Missing is a discussion of the statistical properties important in establishing criterion-related validity in multiple-hurdle designs. The authors address this gap in the literature by suggesting a general statistical model for range restriction corrections. Because the multiple-hurdle design includes as special cases predictive and concurrent designs, the corrections apply also to these designs. The general correction model is based on algorithms from the missing data literature. Two missing data procedures are examined: the estimation-maximization procedure and the Bayesian multiple imputation (MI) procedure. These procedures are large-sample equivalent and often yield similar results. The MI procedure, however, has the added advantage of providing easily obtainable standard errors. A hypothetical example of a multiple-hurdle design is use...},
author = {Mendoza, Jorge L. and Bard, David E. and Mumford, Michael D. and Ang, Siew C.},
doi = {10.1177/1094428104268752},
issn = {1094-4281},
journal = {Organizational Research Methods},
keywords = {corrections,missing data,multiple-hurdle design,range restriction,selection},
month = {oct},
number = {4},
pages = {418--441},
publisher = {Sage PublicationsSage CA: Thousand Oaks, CA},
title = {{Criterion-Related Validity in Multiple-Hurdle Designs: Estimation and Bias}},
url = {http://journals.sagepub.com/doi/10.1177/1094428104268752},
volume = {7},
year = {2004}
}
@article{Le2016,
abstract = {In this study, we present a more accurate method for correcting for range restriction (Case V) that expands upon Bryant and Gokhale's (1972) method. We further present detailed steps to incorporate the Case V method into Schmidt and Hunter's (2015) psychometric meta-analysis methods (both individual correction and artifact distribution approaches). We then evaluate the accuracy of the Case V method vis-` a-vis existing methods. Monte-Carlo simulation results indicate that the Case V method provides very accurate estimates for the mean true score correlation and reasonably accurate estimates for the true standard deviation. More important, Case V almost always provides more accurate results than alternative methods (particularly, Case IV). To illustrate how the Case V method works with real data, we conduct a reanalysis of Judge, Heller, and Mount's (2002) meta-analysis examining the relationships between the Big 5 personality traits and job satisfaction. Results indicate that the true score correlations between the Big 5 traits and job satisfaction have been underestimated, whereas their true standard deviations have been overestimated. Implications for range restriction corrections in organizational research are discussed.},
author = {Le, Huy and Schmidt, Frank L and Wooldridge, Colin D},
doi = {10.1111/peps.12122},
file = {::},
journal = {PERSONNEL PSYCHOLOGY},
pages = {975--1008},
publisher = {Wiley Periodicals, Inc},
title = {{Corrections for Range Restriction in Meta-Analysis Revisited: Improvements and Implications for Organizational Research}},
url = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/peps.12122},
volume = {69},
year = {2016}
}
@techreport{Nosek,
abstract = {An academic scientist's professional success depends on publishing. Publishing norms emphasize novel, positive results. As such, disciplinary incentives encourage design, analysis, and reporting decisions that elicit positive results and ignore negative results. Prior reports demonstrate how these incentives inflate the rate of false effects in published science. When incentives favor novelty over replication, false results persist in the literature unchallenged, reducing efficiency in knowledge accumulation. Previous suggestions to address this problem are unlikely to be effective. For example, a journal of negative results publishes otherwise unpublishable reports. This enshrines the low status of the journal and its content. The persistence of false findings can be meliorated with strategies that make the fundamental},
author = {Nosek, Brian A and Spies, Jeffrey R and Motyl, Matt and Thank, We and Bar-Anan, Yoav and Giner-Sorolla, Roger and Graham, Jesse and Pashler, Hal and Perugini, Marco and Sriram, N and Stodden, Victoria and Wagenmakers, Eric-Jan},
file = {::},
title = {{Incentives for Truth 1 Scientific Utopia: II. Restructuring incentives and practices to promote truth over publishability Prepared for a special issue of Perspectives on Psychological Science}},
url = {http://en.wikipedia.org/wiki/Scientific{\_}method,}
}
@article{Wilkinson1999a,
author = {Wilkinson, Leland and {Task Force on Statistical Inference}},
file = {::},
journal = {American Psychologist},
number = {8},
pages = {594--601},
title = {{Statistical Methods in Psychology Journals: Guidelines and Explanations}},
volume = {54},
year = {1999}
}
@article{Mattis2015,
author = {Mattis, Kristina V.},
doi = {10.1007/s10758-014-9238-0},
file = {::},
issn = {2211-1662},
journal = {Technology, Knowledge and Learning},
month = {jul},
number = {2},
pages = {231--248},
publisher = {Springer Netherlands},
title = {{Flipped Classroom Versus Traditional Textbook Instruction: Assessing Accuracy and Mental Effort at Different Levels of Mathematical Complexity}},
url = {http://link.springer.com/10.1007/s10758-014-9238-0},
volume = {20},
year = {2015}
}
@article{Sahin2015,
author = {Sahin, Alpaslan and Cavlazoglu, Baki and Zeytuncu, Yunus E},
file = {::},
journal = {Educational Technology and Society},
number = {3},
pages = {142--152},
title = {{Flipping a College Calculus Course: A Case Study}},
url = {https://www.researchgate.net/publication/280945591},
volume = {18},
year = {2015}
}
@article{VanGinkel2019,
author = {van Ginkel, Joost R.},
doi = {10.1080/00273171.2018.1540967},
file = {::},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
keywords = {across imputed data sets,and f -tests for,applied to a multiply,change in r 2,combining significance tests for,for r 2 by,harel,imputed data set,may be,r 2 and the,rubin,several methods for,the,the combination rules by,the fisher z -test,used,whenever multiple regression is},
number = {0},
pages = {1--16},
publisher = {Routledge},
title = {{Significance Tests and Estimates for {\textless}i{\textgreater}R{\textless}/i{\textgreater} {\textless}sup{\textgreater}2{\textless}/sup{\textgreater} for Multiple Regression in Multiply Imputed Datasets: A Cautionary Note on Earlier Findings, and Alternative Solutions}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2018.1540967},
volume = {0},
year = {2019}
}
@article{Amrhein2019,
abstract = {Valentin Amrhein, Sander Greenland, Blake McShane and more than 800 signatories call for an end to hyped claims and the dismissal of possibly crucial effects. Valentin Amrhein, Sander Greenland, Blake McShane and more than 800 signatories call for an end to hyped claims and the dismissal of possibly crucial effects.},
author = {Amrhein, Valentin and Greenland, Sander and McShane, Blake},
doi = {10.1038/d41586-019-00857-9},
issn = {0028-0836},
journal = {Nature},
keywords = {Research data,Research management},
month = {mar},
number = {7748},
pages = {305--307},
publisher = {Nature Publishing Group},
title = {{Scientists rise up against statistical significance}},
url = {http://www.nature.com/articles/d41586-019-00857-9},
volume = {567},
year = {2019}
}
@misc{Oakes1986,
author = {Oakes, Michael},
publisher = {New York, NY, USA: John Wiley {\&} Sons},
title = {{Statistical inference: A commentary for the social and behavioral sciences}},
year = {1986}
}
@article{Aiken2008,
author = {Aiken, Leona S. and West, Stephen G. and Millsap, Roger E.},
doi = {10.1037/0003-066X.63.1.32},
issn = {1935-990X},
journal = {American Psychologist},
number = {1},
pages = {32--50},
title = {{Doctoral training in statistics, measurement, and methodology in psychology: Replication and extension of Aiken, West, Sechrest, and Reno's (1990) survey of PhD programs in North America.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0003-066X.63.1.32},
volume = {63},
year = {2008}
}
@article{Cheung2015,
abstract = {The metaSEM package provides functions to conduct univariate, multivariate, and three-level meta-analyses using a structural equation modeling (SEM) approach via the $\backslash$texttt{\{}OpenMx{\}} package in R statistical platform. It also implements the two-stage SEM approach to conducting fixed- and random-effects meta-analytic SEM on correlation or covariance matrices. This paper briefly outlines the theories and their implementations. It provides a summary on how meta-analyses can be formulated as structural equation models. The paper closes with a conclusion on several relevant topics to this SEM-based meta-analysis. Several examples are used to illustrate the procedures in the supplementary material.},
author = {Cheung, Mike W-L},
doi = {10.3389/fpsyg.2014.01521},
file = {::},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Meta-analysis,Meta-analytic structural equation modeling,Structural Equation Modeling,metaSEM,r},
month = {jan},
pages = {1521},
publisher = {Frontiers},
title = {{metaSEM: an R package for meta-analysis using structural equation modeling}},
url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2014.01521/abstract},
volume = {5},
year = {2015}
}
@misc{Wagenmakers,
author = {Wagenmakers, Eric-Jan},
title = {{A Compendium of Clean Graphs in R}},
url = {https://www.shinyapps.org/apps/RGraphCompendium/index.php},
urldate = {2019-03-04}
}
@article{Szucs2017,
abstract = {We have empirically assessed the distribution of published effect sizes and estimated power by extracting more than 100,000 statistical records from about 10,000 cognitive neuroscience and psychology papers published during the past 5 years. The reported median effect size was d=0.93 (inter-quartile range: 0.64-1.46) for nominally statistically significant results and d=0.24 (0.11-0.42) for non-significant results. Median power to detect small, medium and large effects was 0.12, 0.44 and 0.73, reflecting no improvement through the past half-century. Power was lowest for cognitive neuroscience journals. 14{\%} of papers reported some statistically significant results, although the respective F statistic and degrees of freedom proved that these were non-significant; p value errors positively correlated with journal impact factors. False report probability is likely to exceed 50{\%} for the whole literature. In light of our findings the recently reported low replication success in psychology is realistic and worse performance may be expected for cognitive neuroscience.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Szucs, Denes and Ioannidis, John P.A.},
doi = {10.1371/journal.pbio.2000797},
eprint = {arXiv:1011.1669v3},
isbn = {9788578110796},
issn = {15457885},
journal = {PLoS Biology},
number = {3},
pmid = {25246403},
title = {{Empirical assessment of published effect sizes and power in the recent cognitive neuroscience and psychology literature}},
volume = {15},
year = {2017}
}
@article{Mendoza1987,
abstract = {Researchers often need to obtain an estimate of the relationship between a predictor and a criterion under conditions where the magnitude of the correlation coefficient has been distorted by an error component in the predictor and criterion variables (attenuation) and restriction in the range of the predictor. The present paper delineates the factors that influence the joint impact of attenuation and range restriction on the magnitude of the correlation coefficient, and establishes a theoretical framework for the generation of appropriate correction formulas. A set of equations for making joint attenuation and range restriction corrections when there is attenuation and restriction on the predictor is presented. Finally, it is shown that the concept of predictor reliability changes in selection situations because of the correlation between error and true scores.},
author = {Mendoza, Jorge L. and Mumford, Michael},
doi = {10.3102/10769986012003282},
issn = {0362-9791},
journal = {Journal of Educational Statistics},
month = {sep},
number = {3},
pages = {282--293},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Corrections for Attenuation and Range Restriction on the Predictor}},
url = {http://journals.sagepub.com/doi/10.3102/10769986012003282},
volume = {12},
year = {1987}
}
@article{Li2018,
abstract = {A few recent studies indicate that grit, the perseverance and passion for long-term goals, has a positive association with subjective well-being. However, less is known about the psychological mechanisms underlying this association. Here, we tested the role of mindfulness in mediating this association in Chinese adolescents who attended high schools. In Study 1 (N = 1262), we first verified the association between the grit and subjective well-being, given that this association has not been reported in the Chinese population. Consistent with the previous literature, higher levels of grit were associated with higher levels of life satisfaction, more positive affect, and less negative affect. In Study 2 (N = 627), we replicated the association between grit and subjective well-being. More importantly, mediation analyses demonstrated that the association was partly mediated by mindfulness. Together, our findings suggest that the grit may help boost adolescents' subjective well-being through their mindful mind in pursuing their long-term goals.},
author = {Li, Jingguang and Lin, Li and Zhao, Yajun and Chen, Jing and Wang, Song},
doi = {10.1016/J.PAID.2018.05.007},
file = {::},
issn = {0191-8869},
journal = {Personality and Individual Differences},
month = {sep},
pages = {232--237},
publisher = {Pergamon},
title = {{Grittier Chinese adolescents are happier: The mediating role of mindfulness}},
url = {https://www.sciencedirect.com/science/article/pii/S0191886918302538},
volume = {131},
year = {2018}
}
@article{Li2015,
author = {Li, Johnson Ching-Hong},
doi = {10.1111/peps.12096},
file = {::},
issn = {00315826},
journal = {Personnel Psychology},
month = {oct},
number = {4},
pages = {899--927},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{Cohen's d Corrected for Case IV Range Restriction: A More Accurate Procedure for Evaluating Subgroup Differences in Organizational Research}},
url = {http://doi.wiley.com/10.1111/peps.12096},
volume = {68},
year = {2015}
}
@article{Bobko2001,
abstract = {Pearson product moment correlations are often corrected for statistical artifacts such as range restriction and unreliability. Formulas have long existed to make such corrections. However, other effect size estimates are rarely corrected for these artifacts, in spite of the fact that there is an established mathematical link between the correlation and some effect size estimates. Correlations and other effect sizes are therefore vulnerable to the same artifacts. The authors take a common effect size estimate, the standardized mean difference between two groups, and derive (and reaffirm in one instance) correction formulas suitable for use with this statistic. It is demonstrated how these formulas might substantially increase the precision of estimates and decisions made within organizational research and practice, whenever correction factors can be appropriately estimated. An Example In addition to the investigation of validity and other test attributes (e.g., cost), suppose a human resource manager is interested in finding a selection test that has less adverse impact than a traditional measure of cognitive ability. The manager knows/ assumes that if a selection test of cognitive ability is used, then Whites will score, on average, about 1.0 standard deviations higher than Blacks on the test (Sackett {\&} Wilk, 1994). The manager sponsors research on recent hires (incumbents selected on the basis of a cognitive ability test) and finds that the use of a biodata measure leads to a smaller difference , say, .3 of a standard deviation, between Whites and Blacks. The manager concludes that the biodata measure has substantially less adverse impact potential for entry-level selection than the cognitive ability test. Is this conclusion accurate?},
author = {Bobko, Philip and Roth, Philip L and Bobko, Christopher},
file = {::},
journal = {Organizational Research Methods},
number = {1},
pages = {46--61},
title = {{Correcting the Effect Size of d for Range Restriction and Unreliability}},
url = {https://journals.sagepub.com/doi/pdf/10.1177/109442810141003},
volume = {4},
year = {2001}
}
@book{AERA2014,
address = {Washington D.C.},
author = {AERA and APA and NCME},
publisher = {American Educational Research Association},
title = {{Standards for educational and psychological testing}},
year = {2014}
}
@article{Sackett2001,
abstract = {Cognitively loaded tests of knowledge, skill, and ability often contribute to decisions regarding education, jobs, licensure, or certification. Users of such tests often face difficult choices when trying to optimize both the performance and ethnic diversity of chosen individuals. The authors describe the nature of this quandary, review research on different strategies to address it, and recommend using selection materials that assess the full range of relevant attributes using a format that minimizes verbal content as much as is consistent with the outcome one is trying to achieve. They also recommend the use of test preparation, face-valid assessments, and the consideration of relevant job or life experiences. Regardless of the strategy adopted, it is unreasonable to expect that one can maximize both the performance and ethnic diversity of selected individuals.},
author = {Sackett, P R and Schmitt, N and Ellingson, J E and Kabin, M B},
issn = {0003-066X},
journal = {The American psychologist},
month = {apr},
number = {4},
pages = {302--18},
pmid = {11330228},
title = {{High-stakes testing in employment, credentialing, and higher education. Prospects in a post-affirmative-action world.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/11330228},
volume = {56},
year = {2001}
}
@book{SIOP2003,
address = {Bowling Green, OH},
author = {SIOP},
publisher = {Society for Industrial Organizational Psychology},
title = {{Principles for the validation and use of personnel selection procedures (4th ed.).}},
year = {2003}
}
@article{Nosek2018,
abstract = {Progress in science relies in part on generating hypotheses with existing observations and testing hypotheses with new observations. This distinction between postdiction and prediction is appreciated conceptually but is not respected in practice. Mistaking generation of postdictions with testing of predictions reduces the credibility of research findings. However, ordinary biases in human reasoning, such as hindsight bias, make it hard to avoid this mistake. An effective solution is to define the research questions and analysis plan before observing the research outcomes-a process called preregistration. Preregistration distinguishes analyses and outcomes that result from predictions from those that result from postdictions. A variety of practical strategies are available to make the best possible use of preregistration in circumstances that fall short of the ideal application, such as when the data are preexisting. Services are now available for preregistration across all disciplines, facilitating a rapid increase in the practice. Widespread adoption of preregistration will increase distinctiveness between hypothesis generation and hypothesis testing and will improve the credibility of research findings.},
author = {Nosek, Brian A. and Ebersole, Charles R. and DeHaven, Alexander C. and Mellor, David T.},
doi = {10.1073/pnas.1708274114},
isbn = {0000000167},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences},
pmid = {29531091},
title = {{The preregistration revolution}},
year = {2018}
}
@article{VandeSchoot2017,
abstract = {applicability for this approach.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {van de Schoot, Rens and Winter, Sonja D. and Ryan, Ois{\'{i}}n and Zondervan-Zwijnenburg, Mari{\"{e}}lle and Depaoli, Sarah},
doi = {10.1037/met0000100},
eprint = {arXiv:1011.1669v3},
isbn = {9788578110796},
issn = {1082989X},
journal = {Psychological Methods},
number = {2},
pmid = {25246403},
title = {{A systematic review of Bayesian articles in psychology: The last 25 years}},
volume = {22},
year = {2017}
}
@article{Dongena,
author = {van Dongen, Noah and van Doorn, Johnny and Gronau, Quentin Frederik and van Ravenzwaaij, Don and Hoekstra, Rink and Haucke, Matthias and Lakens, Daniel and Hennig, Christian and Morey, Richard D. and Homer, Saskia and Gelman, Andrew and Sprenger, Jan and Wagenmakers, Eric-Jan},
doi = {10.31234/OSF.IO/UE5WB},
file = {::},
keywords = {Quantitative Methods,Social and Behavioral Sciences,Statistical Methods,frequentist or Bayesian,lab analysis,multi,statistical paradigms,testing or estimation},
publisher = {PsyArXiv},
title = {{Multiple Perspectives on Inference for Two Simple Statistical Scenarios}},
url = {https://psyarxiv.com/ue5wb/}
}
@article{Cohen1968,
author = {Cohen, Jacob},
journal = {Psychological Bulletin},
pages = {426----443},
title = {{Multiple regression as a general data-analytic system}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.476.6180},
year = {1968}
}
@article{Dongen,
author = {van Dongen, Noah and van Doorn, Johnny and Gronau, Quentin Frederik and van Ravenzwaaij, Don and Hoekstra, Rink and Haucke, Matthias and Lakens, Daniel and Hennig, Christian and Morey, Richard D. and Homer, Saskia and andrew Gelman and Sprenger, Jan and Wagenmakers, Eric-Jan},
doi = {10.31234/OSF.IO/UE5WB},
keywords = {Quantitative Methods,Social and Behavioral Sciences,Statistical Methods,frequentist or Bayesian,lab analysis,multi,statistical paradigms,testing or estimation},
publisher = {PsyArXiv},
title = {{Multiple Perspectives on Inference for Two Simple Statistical Scenarios}},
url = {https://psyarxiv.com/ue5wb/}
}
@misc{Gelman2017b,
abstract = {It is well known that even experienced scientists routinely misinterpret p-values in all sorts of ways, including confusion of statistical and practical significance, treating non-rejection as acceptance of the null hypothesis, and interpreting the p-value as some sort of replication probability or as the posterior probability that the null hypothesis is true. A common conceptual error is that researchers take the rejection of a straw-man null as evidence in favor of their preferred alternative (Gelman, 2014). A standard mode of operation goes like this: p {\textless} 0.05 is taken as strong evidence against the null hypothesis, p {\textgreater} 0.15 is taken as evidence in favor of the null, and p near 0.10 is taken either as weak evidence for an effect or as evidence of a weak effect. Unfortunately, none of those inferences is generally appropriate: a low p-value is not necessarily strong evidence against the null (see, for example, Morris, 1987, and Gelman and Carlin 2014), a high p-value does not necessarily favor the null (the strength and even the direction of the evidence depends on the alternative hypotheses), and p-values are in general not measures of the size of any underlying effect. But these errors persist, reflecting (a) inherent difficulties in the mathematics and logic of p-values, and (b) the desire of researchers to draw strong conclusions from their data. Continued evidence of these and other misconceptions and their dire consequences for science (the " replication crisis " in psychology, biology, and other applied fields), especially in light of new understanding of how common it is that abundant " researcher degrees of freedom " (Simmons, Nelson, and Simonsohn, 2011) and " gardens of forking paths " (Gelman and Loken, 2014) allow researchers to routinely obtain statistically significant and publishable results from noise, motivated the American Statistical Association to release a Statement on Statistical Significance and p-values in an attempt to highlight the magnitude and importance of problems with current standard practice (Wasserstein and Lazar, 2016). At this point it would be natural for statisticians to think that this is a problem of education and communication. If we could just add a few more paragraphs to the relevant sections of our textbooks, and persuade applied practitioners to consult more with statisticians, then all would be well, or so goes this logic. In their new paper, McShane and Gal present survey data showing that even authors of published articles in a top statistics journal are often confused about the meaning of p-values, especially by treating 0.05, or the range 0.05–0.15, as the location of a threshold. The underlying problem seems to be deterministic thinking. To put it another way, applied researchers and also statisticians are in the habit of demanding more certainty than their data can legitimately supply. The problem is not just that 0.05 is an arbitrary convention; rather, even a seemingly wide range of p-values such as 0.01–0.10 cannot serve to classify evidence in the desired way (Gelman and Stern, 2006).},
author = {Gelman, Andrew and Carlin, John},
booktitle = {Journal of the American Statistical Association},
doi = {10.1080/01621459.2017.1311263},
isbn = {0162-1459},
issn = {1537274X},
number = {519},
title = {{Some Natural Solutions to the p-Value Communication Problem—and Why They Won't Work}},
volume = {112},
year = {2017}
}
@article{Gelman2017a,
abstract = {We argue that the words "objectivity" and "subjectivity" in statistics discourse are used in a mostly unhelpful way, and we propose to replace each of them with broader collections of attributes, with objectivity replaced by transparency, consensus, impartiality, and correspondence to observable reality, and subjectivity replaced by awareness of multiple perspectives and context dependence. The advantage of these reformulations is that the replacement terms do not oppose each other. Instead of debating over whether a given statistical method is subjective or objective (or normatively debating the relative merits of subjectivity and objectivity in statistical practice), we can recognize desirable attributes such as transparency and acknowledgment of multiple perspectives as complementary goals. We demonstrate the implications of our proposal with recent applied examples from pharmacology, election polling, and socioeconomic stratification.},
archivePrefix = {arXiv},
arxivId = {1508.05453},
author = {Gelman, Andrew and Hennig, Christian},
doi = {10.1111/rssa.12276},
eprint = {1508.05453},
isbn = {0022-0957 (Print)$\backslash$n0022-0957 (Linking)},
issn = {1467985X},
journal = {Journal of the Royal Statistical Society. Series A: Statistics in Society},
number = {4},
pmid = {10340204},
title = {{Beyond subjective and objective in statistics}},
volume = {180},
year = {2017}
}
@misc{summerProg,
booktitle = {University of Michigan},
title = {{Summer Programs in Quantitative Methods of Social Research}},
url = {https://www.icpsr.umich.edu/icpsrweb/content/sumprog/schedule.html},
urldate = {2019-01-14},
year = {2019}
}
@article{Wright2018,
abstract = {BACKGROUND Real-time ecological momentary interventions have shown promising effects in domains other than alcohol use; however, only few studies regarding ecological momentary interventions for alcohol use have been conducted thus far. The increasing popularity of smartphones offers new avenues for intervention and innovation in data collection. OBJECTIVE We aimed to test the efficacy of an ecological momentary intervention, comprising mobile Web-based ecological momentary assessments (EMAs) and text messaging (short message service, SMS) brief interventions, delivered during drinking events using participants' mobile phones. METHODS We conducted a three-armed randomized controlled trial to assess the effect of a mobile Web-based ecological momentary assessment with texting feedback on self-reported alcohol consumption and alcohol-related harms in young adults. Participants were enrolled from an existing observational cohort study of young adults screened for risky drinking behavior. The intervention group (ecological momentary intervention group) completed repeated ecological momentary assessments during 6 drinking events and received immediate texting-based feedback in response to each ecological momentary assessment. The second group (ecological momentary assessment group) completed ecological momentary assessments without the brief intervention, and the third did not receive any contact during the trial period. Recent peak risky single-occasion drinking was assessed at the baseline and follow-up using telephone interviews. We used a random effects mixed modeling approach using maximum likelihood estimation to provide estimates of differences in mean drinking levels between groups between baseline and 12-week follow-up. RESULTS A total of 269 participants were randomized into the 3 groups. The ecological momentary intervention group exhibited a small and nonsignificant increase between baseline and follow-up in (geometric) the mean number of standard drinks consumed at the most recent heavy drinking occasion (mean 12.5 vs 12.7). Both ecological momentary assessment and control groups exhibited a nonsignificant decrease (ecological momentary assessment: mean 13.8 vs 11.8; control: mean 12.3 vs 11.6); these changes did not differ significantly between groups (Wald $\chi$22 1.6; P=.437) and the magnitude of the effects of the intervention were markedly small. No other significant differences between groups on measures of alcohol consumption or related harms were observed. The intervention acceptability was high despite the technical problems in delivery. CONCLUSIONS With a small number of participants, this study showed few effects of an SMS-based brief intervention on peak risky single-occasion drinking. Nevertheless, the study highlights areas for further investigation into the effects of EMI on young adults with heavy alcohol consumption. TRIAL REGISTRATION Australian New Zealand Clinical Trials Registry ACTRN12616001323415; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=369534 (Archived by WebCite at http://www.webcitation.org/7074mqwcs).},
author = {Wright, Cassandra and Dietze, Paul M and Agius, Paul A and Kuntsche, Emmanuel and Livingston, Michael and Black, Oliver C and Room, Robin and Hellard, Margaret and Lim, Megan Sc},
doi = {10.2196/mhealth.9324},
file = {::},
issn = {2291-5222},
journal = {JMIR mHealth and uHealth},
keywords = {alcohol,brief intervention,ecological momentary assessment,mHealth,mobile phone,randomized controlled trial,young adults},
month = {jul},
number = {7},
pages = {e149},
pmid = {30030211},
publisher = {JMIR Publications Inc.},
title = {{Mobile Phone-Based Ecological Momentary Intervention to Reduce Young Adults' Alcohol Use in the Event: A Three-Armed Randomized Controlled Trial.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/30030211 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC6076370},
volume = {6},
year = {2018}
}
@article{Bolman2018,
abstract = {OBJECTIVES This study focused on lapse shortly after an attempt to quit smoking. Ecological momentary assessment (EMA) studies have mapped real-time situational factors that induce lapses in everyday life. However, the possible role of nonsmoking intention is disregarded in the dynamic context of daily life, whereas intention plays a key role in behavior change and shifts during smoking cessation. This study therefore aimed to capture the influence of intention on lapse, next to the known risk factors of negative affect, low self-efficacy, craving, positive outcome expectations towards smoking (POEs), being around smokers, and stress. It is hypothesized that scores on these factors shift during the day, especially shortly after quitting, which may induce lapse. Based on behavioral explanation models, intention is hypothesized to mediate the influence of the mentioned factors on lapse. METHODS An EMA study was conducted among 49 self-quitters in the first week of smoking cessation. RESULTS Generalized Linear Mixed Model regression analyses revealed that low nonsmoking intentions, low self-efficacy, and being around smokers (estimates were, respectively, -0.303, -0.331, and 2.083) predicted lapse. Nonsmoking intention partially mediated the influence of self-efficacy on lapse. Nonsmoking intention was predicted by not being around smokers, high self-efficacy, and low POEs (estimates were, respectively, -0.353, 0.293, and -0.072). CONCLUSIONS This small-scale EMA study confirms the importance of nonsmoking intention on lapse, next to self-efficacy and being around smokers. It adds insights into the mediating role of intention on the relationship between self-efficacy and lapse, and into the predictors of nonsmoking intention.},
author = {Bolman, Catherine and Verboon, Peter and Thewissen, Vivianne and Boonen, Viviane and Soons, Karin and Jacobs, Nele},
doi = {10.1097/ADM.0000000000000365},
file = {::},
issn = {1935-3227},
journal = {Journal of addiction medicine},
number = {1},
pages = {65--71},
pmid = {29068827},
publisher = {Wolters Kluwer Health},
title = {{Predicting Smoking Lapses in the First Week of Quitting: An Ecological Momentary Assessment Study.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/29068827 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5794240},
volume = {12},
year = {2018}
}
@article{Genuer2010,
abstract = {This paper proposes, focusing on random forests, the increasingly used statistical method for classification and regression problems introduced by Leo Breiman in 2001, to investigate two classical issues of variable selection. The first one is to find important variables for interpretation and the second one is more restrictive and try to design a good prediction model. The main contribution is twofold: to provide some insights about the behavior of the variable importance index based on random forests and to propose a strategy involving a ranking of explanatory variables using the random forests score of importance and a stepwise ascending variable introduction strategy.},
author = {Genuer, Robin and Poggi, Jean-Michel and Tuleau-Malot, Christine},
file = {::},
journal = {Pattern Recognition Letters},
number = {14},
pages = {2225--2236},
title = {{Variable selection using Random Forests}},
url = {http://www.r-project.org/},
volume = {31},
year = {2010}
}
@inproceedings{Bishop2013,
address = {Atlanta, GA},
author = {Bishop, Jacob Lowell and Verleger, Matthew A.},
booktitle = {American Society for Engineering Education Annual Conference and Exposition},
doi = {10.1109/FIE.2013.6684807},
file = {::},
isbn = {9781467352611},
issn = {15394565},
pmid = {26687962},
title = {{The Flipped Classroom: A survey of the research}},
url = {http://www.mittelrheinische-kirchengeschichte.de/03be6a9b67087e90f/03be6a9b6d091a306.html},
year = {2013}
}
@article{Lane2013,
author = {Lane, Adrianne J. and Mitchell, Claudia G.},
doi = {10.3928/00220124-20130515-33},
issn = {0022-0124},
journal = {The Journal of Continuing Education in Nursing},
month = {jul},
number = {7},
pages = {313--317},
publisher = {SLACK Incorporated},
title = {{Using a Train-the-Trainer Model to Prepare Educators for Simulation Instruction}},
url = {http://www.healio.com/doiresolver?doi=10.3928/00220124-20130515-33},
volume = {44},
year = {2013}
}
@article{Orfaly2005,
abstract = {Published {\textless}2004-{\textgreater}: Philadelphia : Lippincott Williams {\&} Wilkins (powered by Ovid).},
author = {Orfaly, Rebecca and Frances, Joshua and Campbell, Paul and Whittemore, Becky and Joly, Brenda and Hok, Howard},
journal = {Journal of public health management and practice},
number = {6},
pages = {S123--S127},
title = {{Train-the-trainer as an educational model in public health preparedness}},
url = {https://oce.ovid.com/article/00124784-200511001-00021/HTML},
volume = {11},
year = {2005}
}
@article{Marks2013,
author = {Marks, Beth and Sisirak, Jasmina and Chang, Yen-Ching},
doi = {10.1111/jar.12045},
file = {::},
journal = {Journal of Applied Research in Intellectual Disabilities},
month = {jul},
number = {4},
pages = {319--334},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{Efficacy of the {\textless}i{\textgreater}HealthMatters Program{\textless}/i{\textgreater} Train-the-Trainer Model}},
url = {http://doi.wiley.com/10.1111/jar.12045},
volume = {26},
year = {2013}
}
@article{Levine2007,
author = {Levine, Sharon A. and Brett, Belle and Robinson, Bruce E. and Stratos, Georgette A. and Lascher, Steven M. and Granville, Lisa and Goodwin, Carol and Dunn, Kathel and Barry, Patricia P.},
doi = {10.1111/j.1532-5415.2007.01205.x},
file = {::},
issn = {00028614},
journal = {Journal of the American Geriatrics Society},
keywords = {CME,geriatrics education,tool kits},
month = {aug},
number = {8},
pages = {1281--1286},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{Practicing Physician Education in Geriatrics: Lessons Learned from a Train-the-Trainer Model}},
url = {http://doi.wiley.com/10.1111/j.1532-5415.2007.01205.x},
volume = {55},
year = {2007}
}
@article{McLaughlin2014,
author = {McLaughlin, Jacqueline E. and Roth, Mary T. and Glatt, Dylan M. and Gharkholonarehe, Nastaran and Davidson, Christopher A. and Griffin, LaToya M. and Esserman, Denise A. and Mumper, Russell J.},
doi = {10.1097/ACM.0000000000000086},
issn = {1040-2446},
journal = {Academic Medicine},
month = {feb},
number = {2},
pages = {236--243},
title = {{The Flipped Classroom}},
url = {https://insights.ovid.com/crossref?an=00001888-201402000-00017},
volume = {89},
year = {2014}
}
@article{Fulton2012,
author = {Fulton, Kathleen},
journal = {Learning {\&} Leading with Technology},
number = {8},
pages = {12--17},
title = {{Upside down and inside out: Flip Your Classroom to Improve Student Learning}},
url = {https://eric.ed.gov/?id=EJ982840},
volume = {39},
year = {2012}
}
@article{Herreid2013,
author = {Herreid, Clyde Freeman and Schiller, Nancy A.},
journal = {Journal of College Science Teaching},
number = {5},
pages = {62--66},
title = {{Case Studies and the Flipped Classroom}},
url = {https://www.jstor.org/stable/43631584?seq=1{\#}metadata{\_}info{\_}tab{\_}contents},
volume = {42},
year = {2013}
}
@article{Tucker2012,
author = {Tucker, Bill},
file = {::},
journal = {Education Next},
pages = {82--83},
title = {{The Flipped Classroom: Online instruction at home frees class time for learning}},
url = {http://www.msuedtechsandbox.com/MAETELy2-2015/wp-content/uploads/2015/07/the{\_}flipped{\_}classroom{\_}article{\_}2.pdf},
year = {2012}
}
@article{Milman2014,
author = {Milman, Natalie},
journal = {Distance Learning},
number = {4},
title = {{The Flipped Classroom Strategy: What it is and how can it best be used}},
url = {https://books.google.com/books?hl=en{\&}lr={\&}id=NgYoDwAAQBAJ{\&}oi=fnd{\&}pg=PA9{\&}dq=flipped+classroom{\&}ots=M5ErgEbuIh{\&}sig=uENW2ba7Nks{\_}YB0iubLdv6Hb-hk{\#}v=onepage{\&}q=flipped classroom{\&}f=false},
volume = {11},
year = {2014}
}
@misc{CRAN2019,
author = {CRAN},
title = {{CRAN - Contributed Packages}},
url = {https://cran.r-project.org/web/packages/},
urldate = {2019-01-03},
year = {2019}
}
@misc{Muenchen2017,
author = {Muenchen, Robert},
booktitle = {r4stats.com},
title = {{The Popularity of Data Science Software | r4stats.com}},
url = {http://r4stats.com/articles/popularity/},
urldate = {2019-01-03},
year = {2017}
}
@misc{Moodle2019,
author = {Moodle},
title = {{Moodle - Open-source learning platform | Moodle.org}},
url = {https://moodle.org/},
year = {2019}
}
@misc{Fritsch2016,
abstract = {Author Stefan Fritsch [aut], Frauke Guenther [aut, cre], Marc Suling [ctb], Sebastian M. Mueller [ctb] Maintainer Frauke Guenther {\textless}guenther@leibniz-bips.de{\textgreater} Depends R ({\textgreater}= 2.9.0)},
author = {Fritsch, Stefan and Guenther, Frauke and Suling, Marc and Mueller, Sebastian},
file = {::},
title = {neural net (version 1.33)},
url = {http://www.dfg.de},
year = {2016}
}
@article{Hur2017,
author = {Hur, Jae-Hee and Ihm, Sun-Young and Park, Young-Ho},
issn = {1530-8669},
journal = {Wireless Communications and Mobile Computing},
publisher = {Hindawi},
title = {{A Variable Impacts Measurement in Random Forest for Mobile Cloud Computing}},
volume = {2017},
year = {2017}
}
@misc{Jamovi2018,
author = {The jamovi project},
title = {{Jamovi (Version 0.9) [Computer Software]}},
url = {https://www.jamovi.org},
year = {2019}
}
@misc{Liaw2018,
abstract = {Description Classification and regression based on a forest of trees using random inputs , based on Breiman (2001)},
author = {Liaw, Andy and Wiener, Matthew},
doi = {10.1023/A:1010933404324},
file = {::},
isbn = {1010933404324},
title = {{Breiman and Cutler's Random Forests for Classification and Regression (Versions 4.6-14)}},
url = {https://www.stat.berkeley.edu/{~}breiman/RandomForests/},
year = {2018}
}
@misc{SummerPrograminQuantitativeMethodsofSocialResearch,
author = {{Summer Program in Quantitative Methods of Social Research}},
booktitle = {University of Michigan},
title = {{Machine Learning: Applications and Opportunities in Social Science Research}},
url = {https://www.icpsr.umich.edu/icpsrweb/sumprog/courses/0229},
urldate = {2019-01-02}
}
@book{ONeil2013,
author = {O'Neil, Cathy and Schutt, Rachel},
isbn = {1449363903},
publisher = {" O'Reilly Media, Inc."},
title = {{Doing data science: Straight talk from the frontline}},
year = {2013}
}
@techreport{Markow2017,
author = {Markow, Will and Braganza, Soumya and Taska, Bledi and Miller, Seven and Hughes, Debbie},
file = {::},
institution = {Burning Glass Technologies},
title = {{The Quant Crunch: How the demand for data science skills is disrupting the job market}},
url = {https://public.dhe.ibm.com/common/ssi/ecm/im/en/iml14576usen/analytics-analytics-platform-im-analyst-paper-or-report-iml14576usen-20171229.pdf},
year = {2017}
}
@misc{EconomicGraphTeam2017,
author = {{Economic Graph Team}},
booktitle = {LinkedIn},
title = {{LinkedIn's 2017 U.S. Emerging Jobs Report}},
url = {https://economicgraph.linkedin.com/research/LinkedIns-2017-US-Emerging-Jobs-Report},
urldate = {2019-01-02},
year = {2017}
}
@article{Metzler,
author = {Metzler, Katie and Kim, David A and Allum, Nick and Denman, Angella},
doi = {10.4135/wp160926},
file = {::},
title = {{Who Is Doing Computational Social Science? Trends in Big Data Research}},
url = {www.sagepublishing.com}
}
@article{Jaffe2014,
author = {Jaffe, Eric},
journal = {APS Observer},
month = {jul},
number = {6},
title = {{What Big Data Means For Psychological Science}},
url = {https://www.psychologicalscience.org/observer/what-big-data-means-for-psychological-science},
volume = {27},
year = {2014}
}
@article{Jones2006,
abstract = {Propensity score matching is used to investigate the causal relationship between alcoholism and earnings in a young cohort of males and females drawn from the 1989 and 1994 National Longitudinal Survey of Youth (NLSY) in order to investigate productivity losses attributed to alcoholism and to quantify these effects. Results suggest that there are productivity losses attributable to alcoholism; that they become more pronounced over the life cycle; and that they differ between men and women. Ways in which estimates from propensity score matching may or may not improve on instrumental variables estimates are discussed.},
author = {Jones, Alison Snow and Richmond, David W},
doi = {10.1002/hec.1109},
issn = {1057-9230},
journal = {Health economics},
month = {aug},
number = {8},
pages = {849--71},
pmid = {16532514},
title = {{Causal effects of alcoholism on earnings: estimates from the NLSY.}},
url = {http://doi.wiley.com/10.1002/hec.1109 http://www.ncbi.nlm.nih.gov/pubmed/16532514},
volume = {15},
year = {2006}
}
@article{Welsh2018,
author = {Welsh, Justine W. and Hou, Sherry Shu-Yeu and Shentu, Yujia and {L{\^{e}} Cook}, Benjamin},
doi = {10.1080/10826084.2018.1544644},
issn = {1082-6084},
journal = {Substance Use {\&} Misuse},
month = {dec},
pages = {1--7},
title = {{Assessing the association between the strength of state policies on school drug prevention and substance use disorders}},
url = {https://www.tandfonline.com/doi/full/10.1080/10826084.2018.1544644},
year = {2018}
}
@phdthesis{Johnson2018a,
abstract = {Black male youth make up 16{\%} of all public school students in the United States, though they constitute 31{\%} of all juvenile arrests. Very little is known from research about the long-term consequences for such contact on their odds of college enrollment. Thus, the purpose of this study was to test the relationship between Black males' early contact with the criminal justice system through arrest on their probability of enrolling in a four-year college, using a nationally representative sample of approximately 1100 Black males who participated in the National Longitudinal Study of Youth (1997). Survey data were analyzed using descriptive, chi-square, and hierarchical binomial logistic regression techniques. Results expose pervasive limits on Black males' college-going, reveal the statistically significant influence of early arrest on college entry, and have far-reaching implications for research, policy, and outreach. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
annote = {Accession Number: 2017-33538-030. Other Journal Title: Dissertation Abstracts International. Partial author list: First Author {\&} Affiliation: Johnson, Royel Montel; The Ohio State University, Educational Studies, US. Release Date: 20171102. Publication Type: Dissertation Abstract (0400). Format Covered: Electronic. Document Type: Dissertation. Dissertation Number: AAI10610183. ISBN: 978-1369839166. Language: English. Major Descriptor: College Students; Criminal Justice; Juvenile Justice. Minor Descriptor: Blacks. Classification: Forensic Psychology {\&} Legal Issues (4200). Population: Human (10). Location: US. Age Group: Adolescence (13-17 yrs) (200). Methodology: Empirical Study; Quantitative Study.},
author = {Johnson, Royel Montel},
booktitle = {Dissertation Abstracts International Section A: Humanities and Social Sciences},
isbn = {0419-4209},
keywords = {Black Males,Blacks,College Students,Criminal Justice,Juvenile Justice,college-going,criminal justice,juvenile arrest},
number = {10-A(E)},
publisher = {ProQuest Information {\&} Learning},
title = {{Measuring the influence of juvenile arrest on the odds of four-year college enrollment for Black Males: An NLSY analysis}},
url = {http://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=2017-33538-030{\&}site=ehost-live},
volume = {78},
year = {2018}
}
@article{Salas-Wright2015,
abstract = {INTRODUCTION Few, if any, studies have systematically examined the relationship between substance use and teen pregnancy using population-based samples. We aim to provide a comprehensive examination of substance use among pregnant adolescents in the United States. METHOD Employing data from the National Survey on Drug Use and Health between 2002 and 2012 (n=97,850), we examine the prevalence of the past 12-month and the past 30-day substance use and substance use disorders among pregnant and non-pregnant adolescents (ages 12-17). We also examine psychosocial and pregnancy-related correlates of current substance use among the subsample of pregnant adolescents (n=810). RESULTS Pregnant teens were significantly more likely to have experimented with a variety of substances and meet criteria for alcohol (AOR=1.65, 95{\%} CI=1.26-2.17), cannabis (AOR=2.29, 95{\%} CI=1.72-3.04), and other illicit drug use disorders (AOR=2.84, 95{\%} CI=1.92-4.19). Pregnant early adolescents (ages 12-14; AOR=4.34, 95{\%} CI=2.28-8.26) were significantly more likely and pregnant late adolescents (ages 15-17; AOR=0.71, 95{\%} CI=0.56-0.90) significantly less likely than their non-pregnant counterparts to be current substance users. CONCLUSIONS Study findings point not only to a relationship between pregnancy and prior substance use, but also suggest that substance use continues for many teens during pregnancy. We found that substance use is particularly problematic among early adolescents and that the prevalence of substance use attenuates dramatically as youth progress from the first to the second and third trimesters of pregnancy.},
author = {Salas-Wright, Christopher P and Vaughn, Michael G and Ugalde, Jenny and Todic, Jelena},
doi = {10.1016/j.addbeh.2015.01.039},
issn = {1873-6327},
journal = {Addictive behaviors},
keywords = {Early adolescence,Sexual risk behavior,Substance use,Teen pregnancy},
month = {jun},
pages = {218--25},
pmid = {25706068},
title = {{Substance use and teen pregnancy in the United States: evidence from the NSDUH 2002-2012.}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0306460315000532 http://www.ncbi.nlm.nih.gov/pubmed/25706068 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4374013},
volume = {45},
year = {2015}
}
@article{Basanez2018,
author = {Bas{\'{a}}{\~{n}}ez, Tatiana and Majmundar, Anuja and Cruz, Tess Boley and Unger, Jennifer B.},
doi = {10.1016/j.abrep.2018.09.007},
issn = {23528532},
journal = {Addictive Behaviors Reports},
month = {dec},
pages = {147--153},
title = {{Vaping associated with healthy food words: A content analysis of Twitter}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2352853218301172},
volume = {8},
year = {2018}
}
@article{Abbott2012,
abstract = {Individuals reporting high levels of schizotypal personality traits appear to experience low subjective wellbeing, yet little is known about the underlying mechanisms. The aim of the present study was to examine relationships between schizotypal traits and life satisfaction, and the possible role of negative affect. Two hundred and thirty-five healthy individuals completed the Schizotypal Personality Questionnaire, the Personal Wellbeing Index, and the Depression Anxiety Stress Scales-21. Schizotypal traits were associated with lower overall life satisfaction and satisfaction with specific life domains, and greater negative affect. After controlling for negative affect, global schizotypy and the negative and disorganised schizotypal factors were associated with lower overall life satisfaction. Diminished subjective wellbeing appears to be characteristic along the continuum of psychosis, even in individuals reporting subclinical manifestations of schizophrenic-like personality features. The fact that higher schizotypal traits correspond to low life satisfaction above what is explained by negative affective states indicates that there are other factors contributing to this relationship. Neurocognition and social cognition are the most obvious candidates for further exploration. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Abbott},
author = {Abbott, Gavin R and Do, Michael and Byrne, Linda K},
doi = {10.1016/j.paid.2012.01.018},
issn = {0191-8869},
journal = {Personality and Individual Differences},
number = {8},
pages = {914--918},
publisher = {Elsevier Science},
title = {{Diminished subjective wellbeing in schizotypy is more than just negative affect}},
volume = {52},
year = {2012}
}
@article{Blanchard1998,
abstract = {Examined the relationship between anhedonia and the trait dimensions of positive affect (PA) and negative affect (NA) in schizophrenia. The relationship between poor social functioning in schizophrenia and these individual differences in affectivity was also examined. 37 schizophrenic outpatients and 15 normal controls were assessed at a baseline evaluation and again approximately 90 days later. Consistent with a hypothesized decrease in hedonic capacity in schizophrenia, patients reported significantly greater physical and social anhedonia and less PA than controls. However, the schizophrenic group also reported significantly greater NA and social anxiety than did controls. These differences remained stable over the 90-day followup period. Within the schizophrenic group, physical and social anhedonia wee comparably negatively correlated with trait PA; however, social but not physical anhedonia was significantly positively correlated with HA and social anxiety. Poor social functioning in the schizophrenia group was associated with greater physical and social anhedonia and greater NA and social anxiety. Greater trait PA was related to better social functioning. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {US},
author = {Blanchard, Jack J and Mueser, Kim T and Bellack, Alan S},
doi = {10.1093/oxfordjournals.schbul.a033336},
issn = {0586-7614; 1745-1701},
journal = {Schizophrenia bulletin},
number = {3},
pages = {413--424},
publisher = {National Institute of Mental Health},
title = {{Anhedonia, positive and negative affect, and social functioning in schizophrenia}},
volume = {24},
year = {1998}
}
@article{Liberman2005,
author = {Liberman, Robert P. and Robertson, Mary J.},
doi = {10.1159/000087775},
issn = {1016-6262},
journal = {Verhaltenstherapie},
number = {3},
pages = {176--180},
title = {{A Pilot, Controlled Skills Training Study of Schizotypal High School Students}},
volume = {15},
year = {2005}
}
@article{Horan2008,
abstract = {The Psychosis Proneness Scales developed by the Chapmans and colleagues Chapman, J.P., Chapman, L.J., Kwapil, T.R. Scales for the measurement of schizotypy. In: Raine., A., Lencz, T., Mednick, S.A., (Eds.). Schizotypal Personality. New York: Cambridge University Press, 1995. pp. 79-109] are widely used to identify non-patient individuals who are hypothesized to possess heightened vulnerability to schizophrenia and related psychopathology. Yet surprisingly little is known about whether schizophrenia patients themselves show abnormalities on these scales across different clinical states, as would be expected for vulnerability indicators. Scores on four of the Psychosis Proneness Scales were evaluated at three assessment points over a 15-month period in healthy controls (n = 54) and in recent-onset schizophrenia patients (n = 72) who experienced symptom fluctuations across assessments. Patients showed steady elevations on the Physical Anhedonia Scale across time and clinical state, consistent with a stable vulnerability indicator. Patients had higher scores on the Perceptual Aberration and Magical Ideation Scales than controls throughout the follow-up period but scores also changed across clinical states, consistent with a mediating vulnerability indicator. Patients had higher scores on the Impulsive Non-Conformity Scale than controls only during a psychotic state, reflecting an episode indicator. The longitudinal characteristics of these scales in people who are actually diagnosed with schizophrenia provide key evidence for the validity of three commonly used psychometric indicators of vulnerability to psychosis. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Horan},
author = {Horan, William P and Reise, Steven P and Subotnik, Kenneth L and Ventura, Joseph and Nuechterlein, Keith H},
doi = {10.1016/j.schres.2007.12.469},
issn = {0920-9964; 1573-2509},
journal = {Schizophrenia research},
number = {1-3},
pages = {224--236},
publisher = {Elsevier Science},
title = {{The validity of Psychosis Proneness Scales as vulnerability indicators in recent-onset schizophrenia patients}},
volume = {100},
year = {2008}
}
@article{Henry2008,
abstract = {Whilst affective empathy is concerned with one's emotional response to the affective state of another, cognitive empathy refers to one's understanding of another's mental state, and deficits in both are believed to contribute to the social behavioral abnormalities associated with schizophrenia. The present study aimed to test whether individual differences in normally distributed schizotypal personality traits are related to cognitive and affective empathy, and whether any observed association between schizotypy and empathy mediates the relationship between schizotypy and (reduced) social functioning. Non-clinical volunteers (N = 223) completed measures of schizotypal personality, cognitive and affective empathy, social functioning and negative affect. The results indicated that higher schizotypy was associated with reduced empathy, poorer social functioning and increased negative affect. Of the specific schizotypal dimensions (positive, negative and disorganized), only negative schizotypy was significantly associated with social functioning, and this relationship persisted even after controlling for negative affect. Further, affective empathy functioned as a partial mediator in this relationship. These data show that the relationship between negative schizotypy and social functioning is at least partially attributable to deficits in affective empathy. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Henry},
author = {Henry, Julie D and Bailey, Phoebe E and Rendell, Peter G},
doi = {10.1016/j.psychres.2007.04.014},
issn = {0165-1781},
journal = {Psychiatry research},
number = {1},
pages = {15--22},
publisher = {Elsevier Science},
title = {{Empathy, social functioning and schizotypy}},
volume = {160},
year = {2008}
}
@article{Raghavan2017,
abstract = {Background: Improvement in social functioning is important for recovery in first episode psychosis (FEP). Methodology: 51 individuals diagnosed with first episode psychosis were assessed for social functioning at baseline and one year follow-up. Results: Significant improvement was seen in certain domains of social functioning measured by LSP scale such as communication and non-turbulence while no significant changes were observed in self-care and social contact. Conclusion: At one year follow-up, partial improvement in social functioning is observed in individuals with first episode psychosis. This warrants inclusion of specific interventions to improve social functioning in the management plan of individuals with FEP. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Raghavan},
author = {Raghavan, Vijaya and Ramamurthy, Mangala and Rangaswamy, Thara},
doi = {10.1016/j.ajp.2017.09.004},
issn = {1876-2018; 1876-2026},
journal = {Asian Journal of Psychiatry},
pages = {124--126},
publisher = {Elsevier Science},
title = {{Social functioning in individuals with first episode psychosis: One-year follow-up study}},
volume = {30},
year = {2017}
}
@article{Meehl1990,
abstract = {Presents a theory suggesting that while some schizotypal phenomena are direct consequences of central nervous system (CNS) integrative deficits, other schizotypal phenomena are remote derivatives of such deficits and have a rich psychodynamic and social content. Topics discussed include the CNS anomaly approach to schizophrenia, derivation of main signs and symptoms, genetic considerations, and testing of the dominant gene schizotaxia theory. Also considered are taxometric analyses of data on schizophrenia, identification of 2 types of schizophrenia and the concept of biotropes and sociotropes. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {US},
author = {Meehl, Paul E},
doi = {10.1521/pedi.1990.4.1.1},
issn = {0885-579X},
journal = {Journal of personality disorders},
number = {1},
pages = {1--99},
publisher = {Guilford Publications},
title = {{Toward an integrated theory of schizotaxia, schizotypy, and schizophrenia}},
volume = {4},
year = {1990}
}
@incollection{Ventura2010,
abstract = {Stressful life events occur often enough in the lives of schizophrenia patients to significantly increase the risk of psychotic relapse. Some research suggested that individuals who experience stress, but who do not relapse, possess psychological characteristics and cognitive functioning that may be associated with effective coping. In particular, the use of problem-focused coping strategies has been associated with good neurocognitive functioning and has been shown to lessen the impact of stressful life events by lowering the risk of relapse. High levels of self-esteem, hope, and insight and the perception of available social support were found in schizophrenia patients who reported using active problem-focused coping strategies. Protective mechanisms, such as effective coping behaviors, in patients and their families may have changed stressful situations into more minor events and reduced the risk of relapse. Positive symptoms have been reduced in patients who consistently applied coping strategies to stressful life events and used problem-solving techniques. These findings support the importance of coping behavior as a protective factor and mediator in the vulnerability-stress-protective factors model. Despite good evidence for the value of using specific problem-focused coping strategies, schizophrenia patients may fail to use them. This differential use of coping strategies may explain the variability of schizophrenia patients' responses to stressful events. Even after training, they may continue to use ineffective strategies and report that the strategies they used most often were the least effective. Despite the fact that successful coping behaviors may be protective and can be learned, relatively few patients seem to avail themselves of the benefits of using coping strategies. In addition, because many schizophrenia patients have aversive reactions to stressful events, they may respond with avoidance and withdrawal to reduce their acute stress. Thus, further longitudinal research is needed to determine whether short-term or brief avoidance could in some way be adaptive or always leads to problems later. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {San Diego},
author = {Ventura, J},
booktitle = {Stress consequences: Mental, neuropsychological and socioeconomic.},
editor = {Fink, George and {Fink (Ed)}, George},
isbn = {978-012-375174-4; 978-0-12-375175-1},
pages = {189--197},
publisher = {Elsevier Academic Press},
title = {{Psychotic disorders}},
year = {2010}
}
@article{Kang2016,
abstract = {Background: Antipsychotic drugs are limited in their ability to improve negative symptoms, quality of life, and medication adherence in patients with schizophrenia. The addition of nonpharmacological interventions like social skills training has a positive effect on medication adherence and decreases rehospitalization rates but is limited in improving patients' symptoms, aggressive behaviors, and quality of life. Aerobic exercise, especially Tai-chi, can potentially reduce psychopathological and negative symptoms, decrease aggressive behaviors, and improve quality of life. It is an ideal rehabilitation intervention for patients with schizophrenia. However, no study has investigated the effects of social skills training plus Tai-chi on outcomes among outpatients with schizophrenia. This study analyzes the effect of antipsychotics combined with community-based integrated interventions on outcomes of schizophrenia. Methods: In this study, a 24-session social skills training plus Tai-chi was used in community settings among patients with schizophrenia. A total of 244 patients were randomly assigned to medication treatment alone (MTA group) or community-based integrated intervention (CBII group), which accepted social skills training plus Tai-chi in addition to medication treatment. Generalized linear mixed models were used to evaluate the intervention effect (group effect), intervention effect over time (time effect), and interaction effect (group × time effect). t tests were used to evaluate between-group differences on clinical variables. Multiple linear regression analysis was used to analyze the differences between the intervention at 12 months and baseline for the Positive and Negative Syndrome Scale (PANSS) negative symptoms and quality of life-social domain. Results: Compared with the MTA group, the CBII group had lower scores on PANSS (F = 17.312, p {\textless} 0.001) and negative symptoms (F = 44.909, p {\textless} 0.001), a lower risk for aggressive behavior (F = 12.382, p {\textless} 0.001), and a greater improvement in adherence to medication (F = 12.391, p {\textless} 0.001) after 1 year of intervention. The changes in PANSS total scores, negative scores, and social domain of the World Health Organization Quality of Life Scale-Brief version (WHOQOL-BREF) from baseline to 12 months were significant between the two groups (PANSS total score: t = 4.839, p {\textless} 0.001; negative symptoms: t = 8.250, p {\textless} 0.001, and quality of life-social domain: t = -2.171, p = 0.031). Multiple linear regressi{\ldots}},
address = {He},
author = {Kang, Ruiying and Wu, Yeqing and Li, Zhiwu and Jiang, Jun and Gao, Qi and Yu, Yuncui and Gao, Keming and Yan, Yuxiang and He, Yan},
doi = {10.1159/000448195},
issn = {0254-4962; 1423-033X},
journal = {Psychopathology},
number = {5},
pages = {345--355},
publisher = {Karger},
title = {{Effect of community-based social skills training and tai-chi exercise on outcomes in patients with chronic schizophrenia: A randomized, one-year study}},
volume = {49},
year = {2016}
}
@article{Rudnick2009,
abstract = {Standard notions of coping have not been particularly fruitful in the study of schizophrenia. However, facilitation of adaptive coping with serious mental illnesses such as schizophrenia is an important part of mental health care in general and of psychiatric nursing in particular. This study explored factors of coping and examined their relation with symptom severity and with quality of life of outpatients with schizophrenia. Data were analyzed from a previous cross-sectional study, using the Ways of Coping Checklist, the Positive and Negative Syndrome Scale, and the Wisconsin Quality of Life Index. A principal component factor analysis was performed on the Ways of Coping Checklist scores, and the resulting six coping factors were then tested for correlations with Positive and Negative Syndrome Scale and Wisconsin Quality of Life Index scores. Factors conceptually linked with emotion-focused coping were more strongly associated with symptom severity and with quality of life than were factors conceptually linked with problem-focused coping. The emotion-focused versus problem-focused coping framework was only partly explanatory. It may be fruitful to study whether supportive counseling enhances beneficial factors conceptually linked with emotion-focused coping of individuals with schizophrenia. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Rudnick},
author = {Rudnick, Abraham and Martins, Jennifer},
doi = {10.1016/j.apnu.2008.02.009},
issn = {0883-9417},
journal = {Archives of Psychiatric Nursing},
number = {1},
pages = {11--15},
publisher = {Elsevier Science},
title = {{Coping and schizophrenia: A re-analysis}},
volume = {23},
year = {2009}
}
@article{Grant2001,
abstract = {Compares the social functioning of individuals experiencing their first episode of schizophrenia with those who have experienced multiple episodes and with nonpsychiatrically ill control subjects. Subjects included 40 patients with first-episode (FE) schizophrenia, 40 patients with multiepisode (ME) schizophrenia, and a control group of 40 nonpsychiatrically ill individuals. Three social-functioning measures were used: the Social Functioning Scale (SFS), the Quality of Life Scale (QOL), and the Assessment of Interpersonal Problem-Solving Skills (AIPSS). Control subjects significantly outperformed FE and ME participants on all social-functioning measures. FE and ME samples did not differ in their performance on the SFS and the AIPSS. On the QLS, ME participants outperformed FE participants. This study demonstrated that deficits in social functioning are present near the onset of schizophrenia. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Addington},
author = {Grant, Christina and Addington, Jean and Addington, Donald and Konnert, Candace},
issn = {0706-7437; 1497-0015},
journal = {The Canadian Journal of Psychiatry / La Revue canadienne de psychiatrie},
number = {8},
pages = {746--749},
publisher = {Canadian Psychiatric Assn},
title = {{Social functioning in first- and multiepisode schizophrenia}},
volume = {46},
year = {2001}
}
@article{Grove2016,
abstract = {Most people with a serious mental illness experience significant functional impairment despite ongoing pharmacological treatment. Thus, in order to improve outcomes, a better understanding of functional predictors is needed. This study examined negative affect, a construct comprised of negative emotional experience, as a predictor of social functioning across serious mental illnesses. One hundred twenty-seven participants with schizophrenia, 113 with schizoaffective disorder, 22 with psychosis not otherwise specified, 58 with bipolar disorder, and 84 healthy controls (N = 404) completed self-report negative affect measures. Elevated levels of negative affect were observed in clinical participants compared with healthy controls. For both clinical and healthy control participants, negative affect measures were significantly correlated with social functioning, and consistently explained significant amounts of variance in functioning. For clinical participants, this relationship persisted even after accounting for cognition and positive/negative symptoms. The findings suggest that negative affect is a strong predictor of outcome across these populations and treatment of serious mental illnesses should target elevated negative affect in addition to cognition and positive/negative symptoms. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Grove},
author = {Grove, Tyler B and Tso, Ivy F and Chun, Jinsoo and Mueller, Savanna A and Taylor, Stephan F and Ellingrod, Vicki L and McInnis, Melvin G and Deldin, Patricia J},
doi = {10.1016/j.psychres.2016.06.031},
issn = {0165-1781},
journal = {Psychiatry research},
pages = {198--206},
publisher = {Elsevier Science},
title = {{Negative affect predicts social functioning across schizophrenia and bipolar disorder: Findings from an integrated data analysis}},
volume = {243},
year = {2016}
}
@article{Yiend2018,
abstract = {BACKGROUND AND OBJECTIVES Mechanisms of engagement and disengagement of attention to emotional information are thought to contribute to the onset and maintenance of anxiety and depression, a conclusion based largely on findings in analogue subclinical samples. However, we argue that traditionally defined analogue samples can be misleading. Firstly, research has challenged the adequacy of conventional measures of subclinical traits by illustrating that supposedly distinct scales are highly inter-correlated and do not therefore measure independent constructs. Secondly, recent research in clinical groups has revealed results opposite to those expected from the analogue literature, suggesting speeded, rather than impaired, disengagement from threat. METHODS We present analogue findings, from a sample of 70 healthy participants, allowing a purer distinction between the phenomenology of anxiety versus depression using the orthogonal traits of positive and negative affect to classify individuals. RESULTS Using emotional peripheral cueing we found that, at short cue durations, dysphoric individuals' (those with low positive and high negative affect) attention to facial expressions was slowed by emotional compared to neutral invalid cues. LIMITATIONS Limitations included a small sample size and limited generalisability due to sampling from a student population. CONCLUSIONS The data suggest that, in line with the previous subclinical literature, dysphoric individuals are slow to disengage attention from emotional information at early stages of processing and are consistent with the possibility that patterns of orienting of attention might be qualitatively different in subclinical versus clinical populations.},
author = {Yiend, J and Barnicot, K and Williams, M and Fox, E},
doi = {10.1016/j.jbtep.2018.06.008},
issn = {1873-7943},
journal = {Journal of behavior therapy and experimental psychiatry},
month = {dec},
pages = {80--86},
pmid = {29990682},
title = {{The influence of positive and negative affect on emotional attention.}},
volume = {61},
year = {2018}
}
@article{Callaway2014,
abstract = {The psychometric screening and detection of schizotypy through the use of concise self-report assessment instruments such as the Schizotypal Personality Questionnaire-Brief Revised (SPQ-BR; Cohen, Matthews, Najolia, {\&} Brown, 2010) enables an expeditious identification of individuals at putatively elevated risk to develop schizophrenia-spectrum disorders. Using 2 large, culturally diverse, independent samples, this study expanded the psychometric evaluation of this instrument by presenting a series of confirmatory factor analyses; reviewing internal consistency reliabilities; and evaluating the construct validity of the scale by way of examining group differences in SPQ-BR scores between individuals with and without self-reported family histories of schizophrenia. The results indicate a 2-tier factor solution of the measure and indicate strong internal reliability for the scale. Findings regarding construct validity of the SPQ-BR are more variable with the Cognitive-Perceptual Deficits superordinate factor receiving the strongest evidentiary support. Limitations of this study and directions for future research are discussed.},
author = {Callaway, Dallas A and Cohen, Alex S and Matthews, Russell A and Dinzeo, Thomas},
doi = {10.1037/per0000041},
issn = {1949-2723},
journal = {Personality disorders},
month = {jan},
number = {1},
pages = {32--8},
pmid = {24364504},
title = {{Schizotypal Personality Questionnaire-Brief Revised: psychometric replication and extension.}},
volume = {5},
year = {2014}
}
@article{Kim2017,
abstract = {Although affective disorders have been known to have sex differences in the associated clinical characteristics and quality of life (QOL), sex differences among patients with panic disorder (PD) have remained relatively unexplored in Korea. We examined the sex differences in different types of stressful life events (SLEs), coping styles, symptom severity, and health-related QOL (HRQOL) in patients with PD. Data from 291 female and 254 male participants diagnosed with PD were analyzed using a structured clinical interview following the Diagnostic and Statistical Manual of Mental Disorders, 4th Edition criteria. Females with PD reported more SLEs including separation issues, physical illness or disability, and pregnancy-related problems than males. They also reported lower levels of confrontation and help-seeking coping strategies and higher levels of agoraphobia in symptom severity than males. The HRQOL of females with PD was significantly lower than male in physical functioning of HRQOL. This study suggests that the patient's sex is relevant to the assessment and treatment of PD. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Lee},
author = {Kim, Ji Eun and Song, In Han and Lee, Sang-Hyuk},
doi = {10.1097/NMD.0000000000000696},
issn = {0022-3018; 1539-736X},
journal = {Journal of Nervous and Mental Disease},
number = {9},
pages = {714--719},
publisher = {Lippincott Williams {\&} Wilkins},
title = {{Gender differences of stressful life events, coping style, symptom severity, and health-related quality of life in patients with panic disorder}},
volume = {205},
year = {2017}
}
@article{Addington2008,
abstract = {Poor social functioning is a hallmark of schizophrenia. The purpose of this study was to examine social functioning in individuals at clinical high risk for psychosis. Social functioning was assessed in a sample of 86 clinical high risk (CHR) individuals and compared to that of 50 first-episode of psychosis (FE) subjects, 53 multi-episode schizophrenia subjects (ME) and 55 non-psychiatric controls (NPC). Subjects were assessed on the Social Functioning Scale (SFS), the Role Functioning subscale of the Quality of Life Scale (QLS-role), and the premorbid functioning scale. On the SFS, the CHR group did not differ significantly from the FE and ME groups and all were impaired relative to the NPCs. On QLS-role, the CHR group performed significantly better than the ME patients and significantly worse than NPCs. CHR subjects did not differ from patients in terms of premorbid functioning. This study demonstrates that even at the pre-psychotic phase of the illness, these young people are demonstrating significant deficits in social functioning, supporting that social deficits are present long before the onset of psychotic symptoms. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Addington},
author = {Addington, Jean and Penn, David and Woods, Scott W and Addington, Donald and Perkins, Diana O},
doi = {10.1016/j.schres.2007.10.001},
issn = {0920-9964; 1573-2509},
journal = {Schizophrenia research},
number = {1-3},
pages = {119--124},
publisher = {Elsevier Science},
title = {{Social functioning in individuals at clinical high risk for psychosis}},
volume = {99},
year = {2008}
}
@article{Carver1997,
abstract = {Studies of coping in applied settings often confront the need to minimize time demands on participants. The problem of participant response burden is exacerbated further by the fact that these studies typically are designed to test multiple hypotheses with the same sample, a strategy that entails the use of many time-consuming measures. Such research would benefit from a brief measure of coping assessing several responses known to be relevant to effective and ineffective coping. This article presents such a brief form of a previously published measure called the COPE inventory (Carver, Scheier, {\&} Weintraub, 1989), which has proven to be useful in health-related research. The Brief COPE omits two scales of the full COPE, reduces others to two items per scale, and adds one scale. Psychometric properties of the Brief COPE are reported, derived from a sample of adults participating in a study of the process of recovery after Hurricane Andrew.},
author = {Carver, Charles S.},
doi = {10.1207/s15327558ijbm0401_6},
isbn = {1070-5503},
issn = {10705503},
journal = {International Journal of Behavioral Medicine},
month = {mar},
number = {1},
pages = {92--100},
pmid = {16250744},
publisher = {Springer-Verlag},
title = {{You want to measure coping but your protocol's too long: Consider the brief COPE}},
volume = {4},
year = {1997}
}
@article{Watson1988,
abstract = {In recent studies of the structure of affect, positive and negative affect have consistently emerged as two dominant and relatively independent dimensions. A number of mood scales have been created to measure these factors; however, many existing measures are inadequate, showing low reliability or poor convergent or discriminant validity. To fill the need for reliable and valid Positive Affect and Negative Affect scales that are also brief and easy to administer, we developed two 10-item mood scales that comprise the Positive and Negative Affect Schedule (PANAS). The scales are shown to be highly internally consistent, largely uncorrelated, and stable at appropriate levels over a 2-month time period. Normative data and factorial and external evidence of convergent and discriminant validity for the scales are also presented. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
author = {Watson, David and Clark, Lee Anna and Tellegen, Auke},
doi = {10.1037/0022-3514.54.6.1063},
issn = {0022-3514; 1939-1315},
journal = {Journal of personality and social psychology},
number = {6},
pages = {1063--1070},
publisher = {American Psychological Association},
title = {{Development and validation of brief measures of positive and negative affect: The PANAS scales}},
volume = {54},
year = {1988}
}
@article{Wang2017,
abstract = {Schizotypy is a set of personality traits that convey liability to develop schizophrenia. Studying schizotypy in healthy individuals may facilitate the understanding of the psychopathological processes underlying schizophrenia. The present study aimed to examine the developmental trajectories of schizotypy over time using a longitudinal study design. The Chapman Scales for Psychosis Proneness were administered to 1541 college students at baseline, and subsequently at six-monthly intervals up to 18months. Latent class growth analysis was conducted to track the different trajectories. In addition, self-reported scales were used to measure idea of reference, emotional experiences and expression, stress and coping, as well as social functioning. We identified four latent classes with distinct trajectories: 'nonschizotypy' group (LC1), 'stable high schizotypy' group (LC3), 'high reactive schizotypy' group (LC2) and 'low reactive schizotypy' group (LC4). These findings suggest that there may be distinct developmental trajectories for schizotypy. Two groups may be of particular interest: the 'stable high schizotypy' group that displayed the worst clinical and functioning outcomes on almost all measures and the 'high reactive schizotypy' group characterized by a relatively rapid decline in functioning. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Netherlands},
author = {Wang, Yi and Shi, Hai-song and Liu, Wen-hua and Xie, Dong-jie and Geng, Fu-lei and Yan, Chao and Wang, Ya and Xiao, Ya-hui and So, Suzanne H W and Chiu, Chui-De and Leung, Patrick W L and Cheung, Eric F C and Gooding, Diane C and Chan, Raymond C K},
doi = {10.1016/j.schres.2017.07.038},
issn = {0920-9964; 1573-2509},
journal = {Schizophrenia research},
publisher = {Elsevier Science},
title = {{Trajectories of schizotypy and their emotional and social functioning: An 18-month follow-up study}},
year = {2017}
}
@article{MacAulay2013,
abstract = {According to various diathesis-stress models of schizophrenia, life stress plays a defining role in the onset and course of schizophrenia-spectrum disorders. In this regard, individual differences in coping strategies and affective traits, variables related to the management and experience of stress, may play a large role in susceptibility to the disorder and symptom exacerbation. Furthermore, it has been posited that cognitive deficits may limit an individuals' ability to effectively respond to stressful situations. We investigated the relationships between attention, immediate memory, trait negative affect (NA), trait positive affect (PA) and specific coping strategies within three groups: chronic schizophrenia patients (n = 27), psychometrically-defined schizotypy (n = 89), and schizotypy demographically-matched controls (n = 26). As hypothesized affective traits displayed predictable relationships with specific coping strategies, such that NA was associated with the greater use of avoidant coping strategies within the schizophrenia and schizotypy group, while PA was associated with greater use of approach coping styles within all groups. The schizotypy group reported significantly higher levels of NA and also greater use of avoidant coping strategies than both the control and schizophrenia group. As expected group differences were found in trait affect, coping strategies, and cognitive functioning. Importantly, these group differences remained significant even when demographic variables were entered as covariates. Contrary to our expectations, cognitive functioning displayed only a few tenuous relationships with coping strategies within the schizophrenia and schizotypy groups. Overall, results support the notion that affective traits and not cognitive functioning is the best predictor of approach and avoidant coping strategies. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {MacAulay},
author = {MacAulay, Rebecca and Cohen, Alex S},
doi = {10.1016/j.psychres.2013.04.004},
issn = {0165-1781},
journal = {Psychiatry research},
number = {2},
pages = {136--141},
publisher = {Elsevier Science},
title = {{Affecting coping: Does neurocognition predict approach and avoidant coping strategies within schizophrenia spectrum disorders?}},
volume = {209},
year = {2013}
}
@article{Pruessner2011,
abstract = {Stress-vulnerability models of schizophrenia regard psychosocial stress as an important factor in the onset and aggravation of psychotic symptoms, but such research in the early phases of psychosis is limited. Protective factors against the effects of stress might be the key to understanding some inconclusive findings and to the development of optimal psychosocial interventions. The present study compared self-reported levels of stress, self-esteem, social support and active coping in 32 patients with a first episode of psychosis (FEP), 30 individuals at ultra-high risk for psychosis (UHR) and 30 healthy controls. Associations with symptoms of psychosis were assessed in both patient groups. Individuals at UHR reported significantly higher stress levels compared to FEP patients. Both patient groups showed lower self-esteem compared to controls, and the UHR group reported lower social support and active coping than controls. These group differences could not be explained by age and dose of antipsychotic medication in the FEP group. In the UHR group, higher stress levels and lower self-esteem were associated with more severe positive and depressive symptoms on the Brief Psychiatric Rating Scale. Multiple regression analyses revealed that stress was the only significant predictor for both symptom measures and that the relationship was not moderated by self-esteem. Our findings show that individuals at UHR for psychosis experience high levels of psychosocial stress and marked deficits in protective factors. The results suggest that psychosocial interventions targeted at reducing stress levels and improving resilience in this population may be beneficial in improving outcomes. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Pruessner},
author = {Pruessner, Marita and Iyer, Srividya N and Faridi, Kia and Joober, Ridha and Malla, Ashok K},
doi = {10.1016/j.schres.2011.03.022},
issn = {0920-9964; 1573-2509},
journal = {Schizophrenia research},
number = {1},
pages = {29--35},
publisher = {Elsevier Science},
title = {{Stress and protective factors in individuals at ultra-high risk for psychosis, first episode psychosis and healthy controls}},
volume = {129},
year = {2011}
}
@phdthesis{Raposa2016,
abstract = {Exposure to early stressful experiences has been associated with a variety of poor health outcomes in adolescence and adulthood (Felitti et al., 1998; Repetti, Taylor, {\&} Seeman, 2002; Springer, Sheridan, Kuo, {\&} Carnes, 2007). The current project was designed to explore specific psychosocial and biological mechanisms through which early adversity might prospectively shape physical health in adulthood. Study 1 used longitudinal data from a community sample to show that cumulative experiences of early adversity, as measured by contemporaneous maternal report, predicted poor self-reported and interviewer-rated physical health in young adults. Results suggested that early adverse experiences led to ongoing stress in social and nonsocial (e.g., academic) contexts, which in turn portended poor health. Elevated depressive symptoms accounted, in large part, for the effects of ongoing social stress on later risk for poor health. Study 2, using a subset of the Study 1 sample, showed that individuals who experienced early adversity tended to have higher depressive symptoms partially as a result of close friendships with individuals with mental health problems. Study 3 examined the day-to-day dynamics of interpersonal and biological mechanisms of the effects of early adversity on physical health in a sample of college students. Results suggested that young adults who have experienced stressful family environments reported more instances of reassurance-seeking, aggression, and withdrawal on a day-to-day basis. In addition, early adversity predicted higher daily reports of negative affect. Together, these findings support the notion that early adversity can have a long-lasting impact on patterns of psychological, social, and biological functioning, and that early adversity and ongoing stress might contribute to poor physical health in part through their effects on social relationships and mood. Results have implications for the development of targeted interventions designed to prevent the long-term emotional and physical consequences of early life stress. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {US},
author = {Raposa, Elizabeth Brehm},
booktitle = {Dissertation Abstracts International: Section B: The Sciences and Engineering},
isbn = {0419-4217; 978-1339025667},
number = {1-},
publisher = {ProQuest Information {\&} Learning},
title = {{Early adversity, social functioning, mood, and physical health: Developmental and daily process approaches}},
volume = {77},
year = {2016}
}
@article{Brune2005,
abstract = {Several studies have demonstrated that patients with schizophrenia are impaired in recognizing emotions from facial expressions and in appreciating other people's mental states--the latter commonly referred to as 'theory of mind.' The question as to how social cognitive skills relate to patients' actual social behavior is, however, largely unanswered. This study examined emotion recognition, 'theory of mind,' and social behavior in schizophrenia. Emotion recognition, 'theory of mind,' executive functioning, 'crystallized' verbal intelligence, psychopathology, and social behavior were assessed in patients with schizophrenia compared with a healthy control group. Patients were significantly impaired on all tasks involving executive functioning, emotion recognition, and 'theory of mind.' Impaired executive functioning did, however, only partially account for the deficits in social perception and social cognition. Social perception and cognition in schizophrenia predicted the odds of being a patient significantly better than nonsocial cognition. Severe social behavioral abnormalities were linked to the duration of the illness, and even more so to 'theory of mind' deficits. Considering impaired social perception and social cognition significantly contributes to the understanding of social behavioral problems in schizophrenia. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Br{\"{u}}ne},
author = {Br{\"{u}}ne, Martin},
doi = {10.1016/j.psychres.2004.10.007},
issn = {0165-1781},
journal = {Psychiatry research},
number = {2-3},
pages = {135--147},
publisher = {Elsevier Science},
title = {{Emotion recognition, 'theory of mind,' and social behavior in schizophrenia}},
volume = {133},
year = {2005}
}
@article{Cohen2010,
abstract = {Psychometric case identification of individuals at risk for developing schizophrenia-spectrum disorders is an important enterprise. Unfortunately, current instruments for this purpose are limited. The present studies sought to improve the Schizotypal Personality Questionnaire-Brief (SPQ-Brief), a measure of schizotypal traits that has come under recent criticism. In the first study, we conducted exploratory factor analysis on the SPQ-Brief using a Likert-style scoring format in a large sample of nonclinical adults. Although acceptable internal consistency was found, the original factor structure was not supported. In the second study, we administered the full version of the SPQ to a separate large nonclinical adult sample and employed exploratory and confirmatory factor analysis to identify critical items that could be used to revise the SPQ-Brief. The end product of these studies is the SPQ-Brief Revised, which offers a subordinate seven-factor and super-ordinate three or four factor solution, employs a Likert-scale format to improve sensitivity, and is brief (34 items).},
author = {Cohen, Alex S and Matthews, Russell A and Najolia, Gina M and Brown, Laura A},
doi = {10.1521/pedi.2010.24.4.516},
issn = {1943-2763},
journal = {Journal of personality disorders},
month = {aug},
number = {4},
pages = {516--37},
pmid = {20695810},
title = {{Toward a more psychometrically sound brief measure of schizotypal traits: introducing the SPQ-Brief Revised.}},
volume = {24},
year = {2010}
}
@article{Velthorst2017,
abstract = {Objective: Social impairment is a long-recognized core feature of schizophrenia and is common in other psychotic disorders. Still, to date the long-term trajectories of social impairment in psychotic disorders have rarely been studied systematically. Methods: Data came from the Suffolk County Mental Health Project, a 20-year prospective study of first-admission patients with psychotic disorders. A never-psychotic comparison group was also assessed. Latent class growth analysis was applied to longitudinal data on social functioning from 485 respondents with schizophrenia spectrum disorders and psychotic mood disorders, and associations of the empirically derived trajectories with premorbid social adjustment, diagnosis, and 20-year outcomes were examined. Results: Four mostly stable trajectories of preserved (N = 82; 59th percentile of comparison group sample distribution), moderately impaired (N = 148; 17th percentile), severely impaired (N = 181; 3rd percentile), and profoundly impaired (N = 74; 1st percentile) functioning best described the 20-year course of social functioning across diagnoses. The outcome in the group with preserved functioning did not differ from that of never-psychotic individuals at 20 years, but the other groups functioned significantly worse. Differences among trajectories were already evident in childhood. The two most impaired trajectories started to diverge in early adolescence. Poorer social functioning trajectories were strongly associated with other real-world outcomes at 20 years. Multiple trajectories were represented within each disorder. However, more participants with schizophrenia spectrum disorders had impaired trajectories, and more with mood disorders had better functioning trajectories. Conclusions: The results highlight substantial variability of social outcomes within diagnoses—albeit overall worse social outcomes in schizophrenia spectrum disorders—and show remarkably stable long-term impairments in social functioning after illness onset across all diagnoses. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
address = {Velthorst},
author = {Velthorst, Eva and Fett, Anne-Kathrin J and Reichenberg, Avraham and Perlman, Greg and van Os, Jim and Bromet, Evelyn J and Kotov, Roman},
doi = {10.1176/appi.ajp.2016.15111419},
issn = {0002-953X; 1535-7228},
journal = {The American Journal of Psychiatry},
month = {jan},
number = {11},
pages = {1075--1085},
publisher = {American Psychiatric Assn},
title = {{The 20-year longitudinal trajectories of social functioning in individuals with psychotic disorders}},
volume = {174},
year = {2017}
}
@article{Birchwood1990,
abstract = {{\textless}p{\textgreater}Social functioning as an outcome variable in family interventions with schizophrenic patients has been a relatively neglected area. The requirements of a scale of social functioning to measure the efficacy of family interventions include: the measurement of skill/behaviour relevant to the impairments and the demography of this group; the ability to yield considerable information with an economy of clinical time; and the establishment of ‘comparative' need through comparison between subscales and with appropriate reference groups. Results from three samples show that the Social Functioning Scale is reliable, valid, sensitive and responsive to change.{\textless}/p{\textgreater}},
author = {Birchwood, Max and Smith, Jo and Cochrane, Ray and Wetton, Sheila and Copestake, Sonja},
doi = {10.1192/bjp.157.6.853},
issn = {0007-1250},
journal = {British Journal of Psychiatry},
month = {dec},
number = {06},
pages = {853--859},
title = {{The Social Functioning Scale the Development and Validation of a New Scale of Social Adjustment for use in Family Intervention Programmes with Schizophrenic Patients}},
volume = {157},
year = {1990}
}
@article{Couture2006,
abstract = {Deficits in a wide array of functional outcome areas (eg, social functioning, social skills, independent living skills, etc) are marked in schizophrenia. Consequently, much recent research has attempted to identify factors that may contribute to functional outcome; social cognition is one such domain. The purpose of this article is to review research examining the relationship between social cognition and functional outcome. Comprehensive searches of PsycINFO and MEDLINE/PUBMED were conducted to identify relevant published manuscripts to include in the current review. It is concluded that the relationship between social cognition and functional outcome depends on the specific domains of each construct examined; however, it can generally be concluded that there are clear and consistent relationships between aspects of functional outcome and social cognition. These findings are discussed in light of treatment implications for schizophrenia. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Penn},
author = {Couture, Shannon M and Penn, David L and Roberts, David L},
doi = {10.1093/schbul/sbl029},
issn = {0586-7614; 1745-1701},
journal = {Schizophrenia bulletin},
pages = {S44--S63},
publisher = {Oxford University Press},
title = {{The Functional Significance of Social Cognition in Schizophrenia: A Review}},
volume = {32},
year = {2006}
}
@article{Blanchard2001,
abstract = {This study examined the hypothesis that, in schizophrenia, elevated trait social anhedonia (SA) is a stable individual difference, whereas in depression, increased SA is a reflection of a current clinical state that will diminish with recovery. Differences in trait Negative Affect (NA) and Positive Affect (PA) were also examined. Individuals with schizophrenia (n = 55) and depression (n = 34) were evaluated at baseline during hospitalization and compared with nonpsychiatric control participants (n = 41). Participants were assessed again at a 1-year follow-up. At baseline, compared with control participants, individuals with schizophrenia and depression were both characterized by elevated SA, greater NA, and lower PA. In schizophrenic individuals, elevated SA remained stable over the follow-up. However, in recovered depressed patients, SA declined over the follow-up period. Group differences remained in NA and PA over the 1-year follow-up. These results support the view that elevated SA is enduring in schizophrenia but that elevated SA is transiently related to clinical status in depression. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {US},
author = {Blanchard, Jack L and Horan, William P and Brown, Seth A},
doi = {10.1037/0021-843X.110.3.363},
issn = {0021-843X; 1939-1846},
journal = {Journal of abnormal psychology},
number = {3},
pages = {363--371},
publisher = {American Psychological Association},
title = {{Diagnostic differences in social anhedonia: A longitudinal study of schizophrenia and major depressive disorder}},
volume = {110},
year = {2001}
}
@article{Cousins2017,
author = {Cousins, Chris and Servaty-Seib, Heather L. and Lockman, Jennifer},
doi = {10.1177/0030222815598911},
issn = {0030-2228},
journal = {OMEGA - Journal of Death and Dying},
month = {mar},
number = {4},
pages = {386--409},
title = {{College Student Adjustment and Coping}},
volume = {74},
year = {2017}
}
@article{Keser2017,
abstract = {The Stress Generation Hypothesis (SGH) suggests that depressive symptoms lead to stressful interpersonal life events. Based on this hypothesis, a theoretical model was proposed, which tested whether depressive symptoms predict interpersonal conflict via the cognitive triad, emotion-focused coping, and conflict tendency. A non-clinical sample of undergraduate university students (N = 313) participated in the present study. Most participants were female (251 women, 62 men). The mean age of the sample was 20.27 (SD = 3.75). Participants completed a questionnaire set composed of the Beck Depression Inventory, Cognitive Triad Inventory, The Ways of Coping Scale, Conflict Tendency Scale, and Form of Conflict in Close Relationships. According to the model, depressive symptoms were significantly associated with emotion-focused coping and negative cognitive triad, both of which were related with conflict tendency that was in turn associated with conflict frequency. The model explained 24{\%} of the variance in conflict frequency. In future studies and psychotherapy practice, depressive symptoms, emotion-focused coping, and negative attributions about the self, others, and the future should be taken into account with regard to interpersonal conflict. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
address = {Keser},
author = {Keser, Emrah and Kahya, Yasemin and Akin, Bur{\c{c}}in},
doi = {10.1007/s12144-017-9744-z},
issn = {1046-1310; 1936-4733},
journal = {Current Psychology: A Journal for Diverse Perspectives on Diverse Psychological Issues},
month = {apr},
publisher = {Springer},
title = {{Stress generation hypothesis of depressive symptoms in interpersonal stressful life events: The roles of cognitive triad and coping styles via structural equation modeling}},
year = {2017}
}
@article{McCleery2012,
abstract = {Theory of mind (ToM) is an aspect of social cognition that refers to the ability to make inferences about the thoughts, feelings, and intentions of other people. It is believed to be related to social functioning. Previous investigations of ToM in schizotypy have yielded mixed results. Using a correlational approach, the present study explored the relationship between schizotypal traits, ToM, neurocognition, depressed mood, and social functioning in a sample of 50 undergraduate students. Schizotypy was related to poor social functioning. Contrary to predictions, schizotypal traits were not associated with impaired ToM. In fact, schizotypal traits were associated with enhanced performance on a ToM task that involved detection of ironic statements. However, strong relationships emerged among schizotypy, depressed mood, and social functioning, highlighting the need to also examine depression when assessing the relations between elevated schizotypy and poor social functioning. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {McCleery},
author = {McCleery, Amanda and Divilbiss, Marielle and St-Hilaire, Annie and Aakre, Jennifer M and Seghers, James P and Bell, Emily K and Docherty, Nancy M},
issn = {0022-3018; 1539-736X},
journal = {Journal of Nervous and Mental Disease},
number = {2},
pages = {147--152},
publisher = {Lippincott Williams {\&} Wilkins},
title = {{Predicting social functioning in schizotypy: An investigation of the relative contributions of theory of mind and mood}},
volume = {200},
year = {2012}
}
@article{Earnst1997,
abstract = {Reviews the construct validity of negative symptoms, and findings on deficit negative symptoms. The authors outline the components that a valid negative symptom construct requires: (a) replicable relationships with observable phenomena and other constructs; (b) good reliability, temporal stability, and homogeneity; and (c) predict prognosis and response to treatment, possess convergent and discriminant validity, and utility for clinicians. Recent findings provide support for the validity of the construct, but the data suggest that there is a highly correlated set of negative symptoms (e.g., flat affect, alogia, anhedonia, and avolition). Primary and enduring symptoms have good predictive and discriminant validity, and can be studied in the context of the deficit syndrome, as well as with current negative symptom rating scales. It is suggested that future studies should examine whether deficit negative symptoms are better conceptualized as a dimension or a category, elucidate the relationship between deficit symptoms and additional clinical and behavioral variables (e.g., response to newer neuroleptic medications and diminished emotional responding), and explore differences between the pattern of correlates of deficit symptoms, positive, and thought disorder symptoms. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Netherlands},
author = {Earnst, Kelly S and Kring, Ann M},
doi = {10.1016/S0272-7358(96)00052-9},
issn = {0272-7358},
journal = {Clinical psychology review},
number = {2},
pages = {167--190},
publisher = {Elsevier Science},
title = {{Construct validity of negative symptoms: An empirical and conceptual review}},
volume = {17},
year = {1997}
}
@article{Al-Mosaiwi2018,
abstract = {Absolutist thinking is considered a cognitive distortion by most cognitive therapies for anxiety and depression. Yet, there is little empirical evidence of its prevalence or specificity. Across three studies, we conducted a text analysis of 63 Internet forums (over 6,400 members) using the Linguistic Inquiry and Word Count software to examine absolutism at the linguistic level. We predicted and found that anxiety, depression, and suicidal ideation forums contained more absolutist words than control forums (ds {\textgreater} 3.14). Suicidal ideation forums also contained more absolutist words than anxiety and depression forums (ds {\textgreater} 1.71). We show that these differences are more reflective of absolutist thinking than psychological distress. It is interesting that absolutist words tracked the severity of affective disorder forums more faithfully than negative emotion words. Finally, we found elevated levels of absolutist words in depression recovery forums. This suggests that absolutist thinking may be a vulnerability f...},
author = {Al-Mosaiwi, Mohammed and Johnstone, Tom},
doi = {10.1177/2167702617747074},
issn = {2167-7026},
journal = {Clinical Psychological Science},
keywords = {affective disorders,anxiety,cognitive style,depression,open data,open materials,text analysis},
month = {jul},
number = {4},
pages = {529--542},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{In an Absolute State: Elevated Use of Absolutist Words Is a Marker Specific to Anxiety, Depression, and Suicidal Ideation}},
url = {http://journals.sagepub.com/doi/10.1177/2167702617747074},
volume = {6},
year = {2018}
}
@article{Wagenmakers2018a,
abstract = {Bayesian hypothesis testing presents an attractive alternative to p value hypothesis testing. Part I of this series outlined several advantages of Bayesian hypothesis testing, including the ability to quantify evidence and the ability to monitor and update this evidence as data come in, without the need to know the intention with which the data were collected. Despite these and other practical advantages, Bayesian hypothesis tests are still reported relatively rarely. An important impediment to the widespread adoption of Bayesian tests is arguably the lack of user-friendly software for the run-of-the-mill statistical problems that confront psychologists for the analysis of almost every experiment: the t-test, ANOVA, correlation, regression, and contingency tables. In Part II of this series we introduce JASP (http://www.jasp-stats.org), an open-source, cross-platform, user-friendly graphical software package that allows users to carry out Bayesian hypothesis tests for standard statistical problems. JASP is based in part on the Bayesian analyses implemented in Morey and Rouder's BayesFactor package for R. Armed with JASP, the practical advantages of Bayesian hypothesis testing are only a mouse click away.},
archivePrefix = {arXiv},
arxivId = {1109.6471},
author = {Wagenmakers, Eric-Jan and Love, Jonathon and Marsman, Maarten and Jamil, Tahira and Ly, Alexander and Verhagen, Josine and Selker, Ravi and Gronau, Quentin F. and Dropmann, Damian and Boutin, Bruno and Meerhoff, Frans and Knight, Patrick and Raj, Akash and van Kesteren, Erik Jan and van Doorn, Johnny and {\v{S}}m{\'{i}}ra, Martin and Epskamp, Sacha and Etz, Alexander and Matzke, Dora and de Jong, Tim and van den Bergh, Don and Sarafoglou, Alexandra and Steingroever, Helen and Derks, Koen and Rouder, Jeffrey N. and Morey, Richard D.},
doi = {10.3758/s13423-017-1323-7},
eprint = {1109.6471},
isbn = {1531-5320 (Electronic) 1069-9384 (Linking)},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
number = {1},
pmid = {28685272},
title = {{Bayesian inference for psychology. Part II: Example applications with JASP}},
volume = {25},
year = {2018}
}
@article{Gomer2018,
author = {Gomer, Brenna and Jiang, Ge and Yuan, Ke-Hai},
doi = {10.1080/10705511.2018.1545231},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {dec},
pages = {1--19},
title = {{New Effect Size Measures for Sructural Equation Modeling}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2018.1545231},
year = {2018}
}
@article{OlveraAstivia2018,
abstract = {Within the context of moderated multiple regression, mean centering is recommended both to simplify the interpretation of the coefficients and to reduce the problem of multicollinearity. For almost 30 years, theoreticians and applied researchers have advocated for centering as an effective way to reduce the correlation between variables and thus produce more stable estimates of regression coefficients. By reviewing the theory on which this recommendation is based, this article presents three new findings. First, that the original assumption of expectation-independence among predictors on which this recommendation is based can be expanded to encompass many other joint distributions. Second, that for many jointly distributed random variables, even some that enjoy considerable symmetry, the correlation between the centered main effects and their respective interaction can increase when compared with the correlation of the uncentered effects. Third, that the higher order moments of the joint distribution play...},
author = {{Olvera Astivia}, Oscar L. and Kroc, Edward},
doi = {10.1177/0013164418817801},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {interaction,linear model,moderated regression,multicollinearity},
month = {dec},
pages = {001316441881780},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Centering in Multiple Regression Does Not Always Reduce Multicollinearity: How to Tell When Your Estimates Will Not Benefit From Centering}},
url = {http://journals.sagepub.com/doi/10.1177/0013164418817801},
year = {2018}
}
@article{Zhang2018,
author = {Zhang, Qian and Yuan, Ke-Hai and Wang, Lijuan},
doi = {10.1111/bmsp.12151},
file = {::},
issn = {00071102},
journal = {British Journal of Mathematical and Statistical Psychology},
keywords = {Normal‐distribution‐based maximum likelihood,interactions,missing at random data,missing data,moderated multiple regressions,nonlinearity},
mendeley-tags = {interactions,missing data,nonlinearity},
month = {nov},
publisher = {John Wiley {\&} Sons, Ltd (10.1111)},
title = {{Asymptotic bias of normal-distribution-based maximum likelihood estimates of moderation effects with data missing at random}},
url = {http://doi.wiley.com/10.1111/bmsp.12151},
year = {2018}
}
@article{Depaoli2017,
abstract = {Cite as: Depaoli, S., and van de Schoot, R. (in press). Improving transparency and replication in Bayesian Statistics: The WAMBS-Checklist. Psychological Methods.},
author = {Depaoli, Sarah and van de Schoot, Rens},
doi = {10.1037/met0000065},
isbn = {1939-1463 (Electronic) 1082-989X (Linking)},
issn = {1082989X},
journal = {Psychological Methods},
number = {2},
pmid = {26690773},
title = {{Improving transparency and replication in Bayesian statistics: The WAMBS-checklist}},
volume = {22},
year = {2017}
}
@book{Fife2019d,
address = {Chicago, IL},
author = {Fife, Dustin A. and Tremoulet, Polly and Longo, Gabrielle},
booktitle = {Paper presented at the American Psychological Association},
institution = {American Psychological Association},
title = {{Developing and Empirically Validating Flexplot: A Tool for Mapping Statistical Analyses into Graphical Presentation}},
year = {2019}
}
@inproceedings{Rodgers2017,
address = {Washington D.C.},
author = {Rodgers, Joseph Lee},
booktitle = {APA Division 5 Presidential Address},
title = {{Teaching Introductory Statistics to Applied Researchers in the 21st Century: A Dialectic Examination}},
year = {2017}
}
@article{Kalinowski2018,
abstract = {We explored how students interpret the relative likelihood of capturing a population parameter at various points of a CI in two studies. First, an online survey of 101 students found that students' beliefs about the probability curve within a CI take a variety of shapes, and that in fixed choice tasks, 39{\%} CI [30, 48] of students' responses deviated from true distributions. For open ended tasks, this proportion rose to 85{\%}, 95{\%} CI [76, 90]. We interpret this as evidence that, for many students, intuitions about CIs distributions are ill-formed, and their responses are highly susceptible to question format. Many students also falsely believed that there is substantial change in likelihood at the upper and lower limits of the CI, resembling a cliff effect (Rosenthal and Gaito, 1963; Nelson et al., 1986). In a follow-up study, a subset of 24 postgraduate students participated in a 45-min semi-structured interview discussing the students' responses to the survey. Analysis of interview transcripts identified several competing intuitions about CIs, and several new CI misconceptions. During the interview, we also introduced an interactive teaching program displaying a cat's eye CI, that is, a CI that uses normal distributions to depict the correct likelihood distribution. Cat's eye CIs were designed to help students understand likelihood distributions and the relationship between interval length, C{\%} level and sample size. Observed changes in students' intuitions following this teaching program suggest that a brief intervention using cat's eyes can reduce CI misconceptions and increase accurate CI intuitions.},
author = {Kalinowski, Pav and Lai, Jerry and Cumming, Geoff},
doi = {10.3389/fpsyg.2018.00112},
file = {::},
journal = {Frontiers in Psychology},
keywords = {confidence intervals,misconceptions,statistical intuitions,subjective likelihood distribution,teaching},
number = {112},
title = {{A Cross-Sectional Analysis of Students' Intuitions When Interpreting CIs}},
url = {www.frontiersin.org},
volume = {9},
year = {2018}
}
@article{Hoekstra2012,
abstract = {The use of confidence intervals (CIs) as an addition or as an alternative to null hypothesis significance testing (NHST) has been promoted as a means to make researchers more aware of the uncertainty that is inherent in statistical inference. Little is known, however, about whether presenting results via CIs affects how readers judge the probability that an effect is present in the population of interest and whether a replication would be likely to reveal the same results. In the present study, 66 PhD students were asked to interpret statistical outcomes presented as CIs or as conventional statistics (t statistics and associated p values). Fewer misinterpretations of statistics-such as accepting the null hypothesis-and more references to effect size were found when results were presented as CIs. Furthermore, participants tended to be more certain about the existence of a population effect in the expected direction and about the replicability of the results when the results were presented following the conventions of NHST than when presented using CIs. Contrary to expectations, no evidence of a more precipitous drop in the belief of the existence of a population effect and replic-ability estimates when p values exceeded the significance level of .05 was found when data were presented using NHST instead of by CIs.},
author = {Hoekstra, Rink and Johnson, Addie and Kiers, Henk A. L.},
doi = {10.1177/0013164412450297},
file = {::},
journal = {Educational {\&} Psychological Measurement},
number = {6},
title = {{Confidence Intervals Make a Difference: Effects of Showing Confidence Intervals on Inferential Reasoning pls chk}},
url = {http://epm.sagepub.com},
volume = {72},
year = {2012}
}
@article{Wagenmakers2018,
abstract = {Bayesian parameter estimation and Bayesian hypothesis testing present attractive alternatives to classical inference using confidence intervals and p values. In part I of this series we outline ten prominent advantages of the Bayesian approach. Many of these advantages translate to concrete opportunities for pragmatic researchers. For instance, Bayesian hypothesis testing allows researchers to quantify evidence and monitor its progression as data come in, without needing to know the intention with which the data were collected. We end by countering several objections to Bayesian hypothesis testing. Part II of this series discusses JASP, a free and open source software program that makes it easy to conduct Bayesian estimation and testing for a range of popular statistical scenarios (Wagenmakers et al., this issue).},
author = {Wagenmakers, Eric-Jan and Marsman, Maarten and {Tahira Jamil}, {\textperiodcentered} and Ly, {\textperiodcentered} Alexander and Verhagen, Josine and Love, Jonathon and Selker, Ravi and Gronau, Quentin F and Sm{\'{i}}ra, Martiň and Epskamp, Sacha and Matzke, Dora and Rouder, Jeffrey N and Morey, Richard D},
doi = {10.3758/s13423-017-1343-3},
file = {::},
journal = {Psychon Bull Rev},
keywords = {Bayes factor,Hypothesis test,Posterior distribution,Statistical evidence},
pages = {35--57},
title = {{Bayesian inference for psychology. Part I: Theoretical advantages and practical ramifications}},
url = {https://link.springer.com/content/pdf/10.3758{\%}2Fs13423-017-1343-3.pdf},
volume = {25},
year = {2018}
}
@article{Agnoli2017,
abstract = {A survey in the United States revealed that an alarmingly large percentage of university psychologists admitted having used questionable research practices that can contaminate the research literature with false positive and biased findings. We conducted a replication of this study among Italian research psychologists to investigate whether these findings generalize to other countries. All the original materials were translated into Italian, and members of the Italian Association of Psychology were invited to participate via an online survey. The percentages of Italian psychologists who admitted to having used ten questionable research practices were similar to the results obtained in the United States although there were small but significant differences in self-admission rates for some QRPs. Nearly all researchers (88{\%}) admitted using at least one of the practices, and researchers generally considered a practice possibly defensible if they admitted using it, but Italian researchers were much less likely than US researchers to consider a practice defensible. Participants' estimates of the percentage of researchers who have used these practices were greater than the self-admission rates, and participants estimated that researchers would be unlikely to admit it. In written responses, participants argued that some of these practices are not questionable and they have used some practices because reviewers and journals demand it. The similarity of results obtained in the United States, this study, and a related study conducted in Germany suggest that adoption of these practices is an international phenomenon and is likely due to systemic features of the international research and publication processes.},
author = {Agnoli, Franca and Wicherts, Jelte M. and Veldkamp, Coosje L.S. and Albiero, Paolo and Cubelli, Roberto},
doi = {10.1371/journal.pone.0172792},
isbn = {1111111111},
issn = {19326203},
journal = {PLoS ONE},
number = {3},
pmid = {28296929},
title = {{Questionable research practices among Italian research psychologists}},
volume = {12},
year = {2017}
}
@article{Loken2017,
abstract = {Measurement error adds noise to predictions, increases uncertainty in parameter estimates, and makes it more difficult to discover new phenomena or to distinguish among competing theories. A common view is that any study finding an effect under noisy conditions provides evidence that the underlying effect is particularly strong and robust. Yet, statistical significance conveys very little information when measurements are noisy. In noisy research settings, poor measurement can contribute to exaggerated estimates of effect size. This problem and related misunderstandings are key components in a feedback loop that perpetuates the replication crisis in science.},
author = {Loken, Eric and Gelman, Andrew},
doi = {10.1126/science.aal3618},
isbn = {0036-8075},
issn = {10959203},
journal = {Science},
number = {6325},
pmid = {28183939},
title = {{Measurement error and the replication crisis}},
volume = {355},
year = {2017}
}
@article{Schweinsberg2016,
abstract = {This crowdsourced project introduces a collaborative approach to improving the reproducibility of scientific research, in which findings are replicated in qualified independent laboratories before (rather than after) they are published. Our goal is to establish a non-adversarial replication process with highly informative final results. To illustrate the Pre-Publication Independent Replication (PPIR) approach, 25 research groups conducted replications of all ten moral judgment effects which the last author and his collaborators had “in the pipeline” as of August 2014. Six findings replicated according to all replication criteria, one finding replicated but with a significantly smaller effect size than the original, one finding replicated consistently in the original culture but not outside of it, and two findings failed to find support. In total, 40{\%} of the original findings failed at least one major replication criterion. Potential ways to implement and incentivize pre-publication independent replication on a large scale are discussed.},
author = {Schweinsberg, Martin and Madan, Nikhil and Vianello, Michelangelo and Sommer, S. Amy and Jordan, Jennifer and Tierney, Warren and Awtrey, Eli and Zhu, Luke Lei and Diermeier, Daniel and Heinze, Justin E. and Srinivasan, Malavika and Tannenbaum, David and Bivolaru, Eliza and Dana, Jason and Davis-Stober, Clintin P. and du Plessis, Christilene and Gronau, Quentin F. and Hafenbrack, Andrew C. and Liao, Eko Yi and Ly, Alexander and Marsman, Maarten and Murase, Toshio and Qureshi, Israr and Schaerer, Michael and Thornley, Nico and Tworek, Christina M. and Wagenmakers, Eric-Jan and Wong, Lynn and Anderson, Tabitha and Bauman, Christopher W. and Bedwell, Wendy L. and Brescoll, Victoria and Canavan, Andrew and Chandler, Jesse J. and Cheries, Erik and Cheryan, Sapna and Cheung, Felix and Cimpian, Andrei and Clark, Mark A. and Cordon, Diana and Cushman, Fiery and Ditto, Peter H. and Donahue, Thomas and Frick, Sarah E. and Gamez-Djokic, Monica and Grady, Rebecca Hofstein and Graham, Jesse and Gu, Jun and Hahn, Adam and Hanson, Brittany E. and Hartwich, Nicole J. and Hein, Kristie and Inbar, Yoel and Jiang, Lily and Kellogg, Tehlyr and Kennedy, Deanna M. and Legate, Nicole and Luoma, Timo P. and Maibuecher, Heidi and Meindl, Peter and Miles, Jennifer and Mislin, Alexandra and Molden, Daniel C. and Motyl, Matt and Newman, George and Ngo, Hoai Huong and Packham, Harvey and Ramsay, Philip S. and Ray, Jennifer L. and Sackett, Aaron M. and Sellier, Anne Laure and Sokolova, Tatiana and Sowden, Walter and Storage, Daniel and Sun, Xiaomin and {Van Bavel}, Jay J. and Washburn, Anthony N. and Wei, Cong and Wetter, Erik and Wilson, Carlos T. and Darroux, Sophie Charlotte and Uhlmann, Eric Luis},
doi = {10.1016/j.jesp.2015.10.001},
isbn = {00221031},
issn = {10960465},
journal = {Journal of Experimental Social Psychology},
title = {{The pipeline project: Pre-publication independent replications of a single laboratory's research pipeline}},
volume = {66},
year = {2016}
}
@unpublished{Leek2017,
abstract = {There has been an increasing concern in both the scientific and lay communities that most published medical findings are false. But what does it mean to be false? Here we describe the range of definitions of false discoveries in the scientific literature. We summarize the philosophical, statistical, and experimental evidence for each type of false discovery. We discuss common underpinning problems with the scientific and data analytic practices and point to tools and behaviors that can be implemented to reduce the problems with published scientific results.},
author = {Leek, Jeffrey T. and Jager, Leah R.},
booktitle = {SSRN},
doi = {10.1146/annurev-statistics-060116-054104},
isbn = {0601160541},
issn = {2326-8298},
pmid = {24128111},
title = {{Is Most Published Research Really False?}},
year = {2017}
}
@article{Gelman2014b,
abstract = {Statistical power analysis provides the conventional approach to assess error rates when designing a research study. However, power analysis is flawed in that a narrow emphasis on statistical significance is placed as the primary focus of study design. In noisy, small-sample settings, statistically significant results can often be misleading. To help researchers address this problem in the context of their own studies, we recommend design calculations in which (a) the probability of an estimate being in the wrong direction (Type S [sign] error) and (b) the factor by which the magnitude of an effect might be overestimated (Type M [magnitude] error or exaggeration ratio) are estimated. We illustrate with examples from recent published research and discuss the largest challenge in a design calculation: coming up with reasonable estimates of plausible effect sizes based on external information.},
author = {Gelman, Andrew and Carlin, John},
doi = {10.1177/1745691614551642},
isbn = {1745-6924 (Electronic) 1745-6916 (Linking)},
issn = {17456924},
journal = {Perspectives on Psychological Science},
number = {6},
pmid = {26186114},
title = {{Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors}},
volume = {9},
year = {2014}
}
@article{Wasserstein2016,
abstract = {Additional reading: http://www.nature.com/news/statisticians-issue-warning-over-misuse-of-p-values-1.19503},
archivePrefix = {arXiv},
arxivId = {1011.1669},
author = {Wasserstein, Ronald L. and Lazar, Nicole A.},
doi = {10.1080/00031305.2016.1154108},
eprint = {1011.1669},
isbn = {0003-1305 1537-2731},
issn = {15372731},
journal = {American Statistician},
number = {2},
pmid = {25246403},
title = {{The ASA's Statement on p-Values: Context, Process, and Purpose}},
volume = {70},
year = {2016}
}
@article{Ke2018,
author = {Ke, Zijun and Zhang, Qian and Tong, Xin},
doi = {10.1080/10705511.2018.1530059},
file = {::},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
month = {nov},
pages = {1--23},
title = {{Bayesian Meta-Analytic SEM: A One-Stage Approach to Modeling Between-Studies Heterogeneity in Structural Parameters}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2018.1530059},
year = {2018}
}
@misc{Ioannidis2018,
abstract = {P values and accompanying methods of statistical significance testing are creating challenges in biomedical science and other disciplines. The vast majority (96{\%}) of articles that report P values in the abstract, full text, or both include some values of .05 or less.1 However, many of the claims that these reports highlight are likely false.2 Recognizing the major importance of the statistical significance conundrum, the American Statistical Association (ASA) published3 a statement on P values in 2016. The status quo is widely believed to be problematic, but how exactly to fix the problem is far more contentious. The contributors to the ASA statement also wrote 20 independent, accompanying commentaries focusing on different aspects and prioritizing different solutions. Another large coalition of 72 methodologists recently proposed4 a specific, simple move: lowering the routine P value threshold for claiming statistical significance from .05 to .005 for new discoveries. The proposal met with strong endorsement in some circles and concerns in others.},
author = {Ioannidis, John P.A.},
booktitle = {JAMA - Journal of the American Medical Association},
doi = {10.1001/jama.2018.1536},
isbn = {1538-3598 (Electronic)0098-7484 (Linking)},
issn = {15383598},
number = {14},
pmid = {29566133},
title = {{The proposal to lower P value thresholds to .005}},
volume = {319},
year = {2018}
}
@book{Busse2017,
abstract = {Boundary conditions (BC) have long been discussed as an important element in theory development, referring to the “Who, Where, When” aspects of a theory. However, it still remains somewhat vague as to what exactly BC are, how they can or even should be explored, and why their understanding matters. This research tackles these important questions by means of an in-depth theoretical-methodological analysis. The study contributes fourfold to organizational research methods: First, it develops a more accurate and explicit understanding of BC. Second, it widens the understanding of how BC can be explored by suggesting and juxtaposing new tools and approaches. It also illustrates BC-exploring processes, drawing on two empirical case examples. Third, it analyzes the reasons for exploring BC, concluding that BC exploration fosters theory development, strengthens research validity, and mitigates the research-practice gap. Fourth, it synthesizes the analyses into 12 tentative suggestions for how scholars should subsequently approach the issues surrounding BC. The authors hope that the study contributes to consensus shifting with respect to BC and draws more attention to BC. },
author = {Busse, Christian and Kach, Andrew P. and Wagner, Stephan M.},
booktitle = {Organizational Research Methods},
doi = {10.1177/1094428116641191},
file = {::},
isbn = {1094428116},
issn = {15527425},
keywords = {boundary condition,context,generalizability,research-practice gap,theorizing,validity},
number = {4},
pages = {574--609},
title = {{Boundary Conditions: What They Are, How to Explore Them, Why We Need Them, and When to Consider Them}},
volume = {20},
year = {2017}
}
@techreport{Tversky1974,
author = {Tversky, Amos and Kahneman, Daniel},
booktitle = {New Series},
file = {::},
isbn = {185{\%}3A4157{\%}3C1},
number = {4157},
pages = {1124--1131},
title = {{Judgment under Uncertainty: Heuristics and Biases}},
volume = {185},
year = {1974}
}
@article{Ock2018,
author = {Ock, Jisoo and Oswald, Frederick L.},
doi = {10.1027/1866-5888/a000205},
issn = {1866-5888},
journal = {Journal of Personnel Psychology},
month = {oct},
number = {4},
pages = {172--182},
title = {{The Utility of Personnel Selection Decisions}},
url = {https://econtent.hogrefe.com/doi/10.1027/1866-5888/a000205},
volume = {17},
year = {2018}
}
@article{Fontanella2018,
author = {Fontanella, Lara and Fontanella, Sara and Valentini, Pasquale and Trendafilov, Nickolay},
doi = {10.1080/00273171.2018.1496317},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
month = {nov},
pages = {1--13},
title = {{Simple Structure Detection Through Bayesian Exploratory Multidimensional IRT Models}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2018.1496317},
year = {2018}
}
@article{Sengewald2018,
author = {Sengewald, Marie-Ann and Steiner, Peter M. and Pohl, Steffi},
doi = {10.1111/bmsp.12146},
file = {::},
issn = {00071102},
journal = {British Journal of Mathematical and Statistical Psychology},
keywords = {analysis of covariance,causal effect,latent covariates,propensity scores,within‐study design},
month = {oct},
publisher = {Wiley/Blackwell (10.1111)},
title = {{When does measurement error in covariates impact causal effect estimates? Analytic derivations of different scenarios and an empirical illustration}},
url = {http://doi.wiley.com/10.1111/bmsp.12146},
year = {2018}
}
@article{Johnson2018,
abstract = {Most study samples show less variability in key variables than do their source populations due most often to indirect selection into study participation associated with a wide range of personal and circumstantial characteristics. Formulas exist to correct the distortions of population-level correlations created. Formula accuracy has been tested using simulated normally distributed data, but empirical data are rarely available for testing. We did so in a rare data set in which it was possible: the 6-Day Sample, a representative subsample of 1,208 from the Scottish Mental Survey 1947 of cognitive ability in 1936-born Scottish schoolchildren (70,805). 6-Day Sample participants completed a follow-up assessment in childhood and were re-recruited for study at age 77 years. We compared full 6-Day Sample correlations of early-life variables with those of the range-restricted correlations in the later-participating subsample, before and after adjustment for direct and indirect range restriction. Results differed, ...},
author = {Johnson, Wendy and Deary, Ian J. and Bouchard, Thomas J.},
doi = {10.1177/0013164417736092},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {adjustment formulas,distortion,range restriction,skew,statistical bias,study participation},
month = {dec},
number = {6},
pages = {1021--1055},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Have Standard Formulas Correcting Correlations for Range Restriction Been Adequately Tested?: Minor Sampling Distribution Quirks Distort Them}},
url = {http://journals.sagepub.com/doi/10.1177/0013164417736092},
volume = {78},
year = {2018}
}
@article{Cetin-Berber2018,
abstract = {Routing examinees to modules based on their ability level is a very important aspect in computerized adaptive multistage testing. However, the presence of missing responses may complicate estimation of examinee ability, which may result in misrouting of individuals. Therefore, missing responses should be handled carefully. This study investigated multiple missing data methods in computerized adaptive multistage testing, including two imputation techniques, the use of full information maximum likelihood and the use of scoring missing data as incorrect. These methods were examined under the missing completely at random, missing at random, and missing not at random frameworks, as well as other testing conditions. Comparisons were made to baseline conditions where no missing data were present. The results showed that imputation and the full information maximum likelihood methods outperformed incorrect scoring methods in terms of average bias, average root mean square error, and correlation between estimated a...},
author = {Cetin-Berber, Dee Duygu and Sari, Halil Ibrahim and Huggins-Manley, Anne Corinne},
doi = {10.1177/0013164418805532},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {Missing data,computerized adaptive multistage testing,imputation},
month = {oct},
pages = {001316441880553},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Imputation Methods to Deal With Missing Responses in Computerized Adaptive Multistage Testing}},
url = {http://journals.sagepub.com/doi/10.1177/0013164418805532},
year = {2018}
}
@article{Shiffrin2018a,
abstract = {It appears paradoxical that science is producing outstanding new results and theories at a rapid rate at the same time that researchers are identifying serious problems in the practice of science that cause many reports to be irreproducible and invalid. Certainly, the practice of science needs to be improved, and scientists are now pursuing this goal. However, in this perspective, we argue that this seeming paradox is not new, has always been part of the way science works, and likely will remain so. We first introduce the paradox. We then review a wide range of challenges that appear to make scientific success difficult. Next, we describe the factors that make science work—in the past, present, and pre-sumably also in the future. We then suggest that remedies for the present practice of science need to be applied selectively so as not to slow progress and illustrate with a few examples. We conclude with arguments that communication of science needs to empha-size not just problems but the enormous successes and benefits that science has brought and is now bringing to all elements of modern society. scientometrics | science history | scientific progress | reproducibility | science communication},
archivePrefix = {arXiv},
arxivId = {1710.01946},
author = {Shiffrin, Richard M. and B{\"{o}}rner, Katy and Stigler, Stephen M.},
doi = {10.1073/pnas.1711786114},
eprint = {1710.01946},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences},
pmid = {29531095},
title = {{Scientific progress despite irreproducibility: A seeming paradox}},
year = {2018}
}
@article{Matzke2015,
abstract = {A growing body of research has suggested that horizontal saccadic eye movements facilitate the retrieval of episodic memories in free recall and recognition memory tasks. Nevertheless, a minority of studies have failed to replicate this effect. This article attempts to resolve the inconsistent results by introducing a novel variant of proponent-skeptic collaboration. The proposed approach combines the features of adversarial collaboration and purely confirmatory preregistered research. Prior to data collection, the adversaries reached consensus on an optimal research design, formulated their expectations, and agreed to submit the findings to an academic journal regardless of the outcome. To increase transparency and secure the purely confirmatory nature of the investigation, the 2 parties set up a publicly available adversarial collaboration agreement that detailed the proposed design and all foreseeable aspects of the data analysis. As anticipated by the skeptics, a series of Bayesian hypothesis tests indicated that horizontal eye movements did not improve free recall performance. The skeptics suggested that the nonreplication may partly reflect the use of suboptimal and questionable research practices in earlier eye movement studies. The proponents countered this suggestion and used a p curve analysis to argue that the effect of horizontal eye movements on explicit memory did not merely reflect selective reporting.},
author = {Matzke, Dora and Nieuwenhuis, Sander and {Van Rijn}, Hedderik and Slagter, Heleen A and {Van Der Molen}, Maurits W and Wagenmakers, Eric-Jan},
doi = {10.1037/xge0000038},
file = {::},
keywords = {Bayes factor,adversarial collaboration,horizontal eye movements,preregistration,replication},
title = {{The Effect of Horizontal Eye Movements on Free Recall: A Preregistered Adversarial Collaboration}},
url = {http://dx.doi.org/10.1037/xge0000038},
year = {2015}
}
@article{Perezgonzalez2017,
abstract = {Seeking to address the lack of research reproducibility in science, including psychology and the life sciences, a pragmatic solution has been raised recently:  to use a stricter p {\textless} 0.005 standard for statistical significance when claiming evidence of new discoveries. Notwithstanding its potential impact, the proposal has motivated a large mass of authors to dispute it from different philosophical and methodological angles. This article reflects on the original argument and the consequent counterarguments, and concludes with a simpler and better-suited alternative that the authors of the proposal knew about and, perhaps, should have made from their Jeffresian perspective: to use a Bayes factors analysis in parallel (e.g., via JASP) in order to learn more about frequentist error statistics and about Bayesian prior and posterior beliefs without having to mix inconsistent research philosophies.},
annote = {This is just a summary of the objections to the lowering threshold. Nothing too new, although I do like the point he emphasizes that lowering the threshold simply increases our reliance on NHST.},
author = {Perezgonzalez, Jose D. and Fr{\'{i}}as-Navarro, M. Dolores},
doi = {10.12688/f1000research.13389.1},
file = {::},
issn = {2046-1402},
journal = {F1000Research},
keywords = {NHST; solutions increase reliance on},
mendeley-tags = {NHST; solutions increase reliance on},
pmid = {29333250},
title = {{Retract p {\textless} 0.005 and propose using JASP, instead}},
year = {2017}
}
@article{Fiedler2017,
abstract = {A Bayesian perspective on Ioannidis's (2005) memorable statement that “Most Published Research Findings Are False” suggests a seemingly inescapable trade-off: It appears as if research hypotheses are based either on safe ground (high prior odds), yielding valid but unsurprising results, or on unexpected and novel ideas (low prior odds), inspiring risky and surprising findings that are inevitably often wrong. Indeed, research of two prominent types, sexy hypothesis testing and model testing, is often characterized by low priors (due to astounding hypotheses and conjunctive models) as well as low-likelihood ratios (due to nondiagnostic predictions of the yin-or-yang type). However, the trade-off is not inescapable: An alternative research approach, theory-driven cumulative science, aims at maximizing both prior odds and diagnostic hypothesis testing. The final discussion emphasizes the value of pluralistic science, within which exploratory phenomenon-driven research can play a similarly strong part as stric...},
author = {Fiedler, Klaus},
doi = {10.1177/1745691616654458},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {model testing,phenomenon-driven research,sexy-hypothesis testing,theory-driven cumulative science},
month = {jan},
number = {1},
pages = {46--61},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{What Constitutes Strong Psychological Science? The (Neglected) Role of Diagnosticity and A Priori Theorizing}},
url = {http://journals.sagepub.com/doi/10.1177/1745691616654458},
volume = {12},
year = {2017}
}
@incollection{Mulaik2016,
author = {Mulaik, Stanley A. and Raju, Nambury S. and Harshman, Richard A. and Raju, Nambury S. and Harshman, Richard A.},
booktitle = {What if there were no significance tests},
chapter = {4},
doi = {10.4324/9781315629049-16},
edition = {1st},
editor = {Harlow, Lisa L. and Mulaik, Stanley A. and Steiger, James H.},
pages = {109--154},
publisher = {Routledge},
title = {{There Is a Time and a Place for Significance Testing}},
url = {https://www.taylorfrancis.com/books/e/9781317242857/chapters/10.4324{\%}2F9781315629049-16},
year = {2016}
}
@book{popper_logic_1958,
address = {London},
author = {Popper, K},
publisher = {Routledge},
title = {{The Logic of Scientific Inquiry}},
year = {1959}
}
@article{li_bootstrap_2011,
author = {Li, Johnson Ching-Hong and Chan, Wai and Cui, Ying},
issn = {2044-8317},
journal = {The British Journal Of Mathematical And Statistical Psychology},
number = {3},
pages = {367--387},
title = {{Bootstrap standard error and confidence intervals for the correlations corrected for indirect range restriction.}},
volume = {64},
year = {2011}
}
@article{fife_estimating_2016,
abstract = {Correcting attenuated correlations from selected samples is a common goal in organizational settings. Hunter and Schmidt introduced a procedure, called Case IV, for correcting correlations when a researcher has no information on the variable(s) used by an organization to form a suitability judgment. In this article, we compare Case IV to two other comparable procedures: the first correction (the expectation maximization algorithm) requires raw data about the selection variables used to form a suitability judgment. The second, the Pearson-Lawley correction, requires the variance-covariance matrix of the selection variables. We show that even when the variables used for selection are unobserved or unavailable, it is still possible to estimate parameters without making the restrictive assumptions of Case IV. In addition, these two corrections almost always outperform Case IV, particularly when the critical assumption of Case IV is violated. We also provide R code illustrating the use of these correction procedures.},
author = {Fife, Dustin A. and Hunter, Michael D and Mendoza, Jorge L},
doi = {10.1177/1094428115625323},
file = {::},
issn = {1094-4281},
journal = {Organizational Research Methods},
month = {oct},
number = {4},
pages = {593--615},
shorttitle = {Estimating {\{}Unattenuated{\}} {\{}Correlations{\}} {\{}With{\}} {\{}L}},
title = {{Estimating Unattenuated Correlations With Limited Information About Selection Variables: Alternatives to Case IV}},
url = {http://dx.doi.org/10.1177/1094428115625323},
volume = {19},
year = {2016}
}
@article{gross_restriction_1987,
author = {Gross, Alan L and McGanney, Mary L},
issn = {0021-9010},
journal = {Journal of Applied Psychology},
keywords = {Statistical Analysis,Statistical Regression,Test Validity,estimation of test validity without observable cr,restriction of range correction formula vs proced},
number = {4},
pages = {604--610},
title = {{The restriction of range problem and nonignorable selection processes.}},
volume = {72},
year = {1987}
}
@article{ghiselli_measurement_1981,
author = {Ghiselli, E E and Campbell, J P and Zedeck, S},
title = {{Measurement Theory for the Behavioral Sciences}},
year = {1981}
}
@article{cohen_power_1992,
abstract = {One possible reason for the continued neglect of statistical power analysis in research in the behavioral sciences is the inaccessibility of or difficulty with the standard material. A convenient, although not comprehensive, presentation of required sample sizes is provided here. Effect-size indexes and conventional values for these are given for operationally defined small, medium, and large effects. The sample sizes necessary for .80 power to detect effects at these levels are tabled for eight standard statistical tests: (a) the difference between independent means, (b) the significance of a product-moment correlation, (c) the difference between independent rs, (d) the sign test, (e) the difference between independent proportions, (f) chi-square tests for goodness of fit and contingency tables, (g) one-way analysis of variance, and (h) the significance of a multiple or multiple partial correlation.},
author = {Cohen, Jacob},
doi = {http://dx.doi.org/10.1037/0033-2909.112.1.155},
issn = {0033-2909},
journal = {Psychological Bulletin},
keywords = {Chi-Square Distribution,Psychology,Sample Size},
month = {jul},
number = {1},
pages = {155--159},
pmid = {19565683},
title = {{A power primer}},
volume = {112},
year = {1992}
}
@article{alexander_correction_1990,
author = {Alexander, Ralph A},
journal = {Journal of Educational Measurement},
keywords = {Correction Formulas,Correlation,Equations (Mathematics),Mathematical Models,Range Restriction,Selection,Statistical Analysis,Variables (Mathematics)},
number = {2},
pages = {187 ---- 89},
title = {{Correction {\{}Formulas{\}} for {\{}Correlations{\}} {\{}Restricted{\}} by {\{}Selection{\}} on an {\{}Unmeasured{\}} {\{}Variable{\}}.}},
volume = {27},
year = {1990}
}
@article{boker_openmx:_2011,
author = {Boker, Steven M and Neale, Michael C and Maes, Hermine H and Wilde, Michael J and Spiegel, Michael and Brick, Timothy R and Spies, Jeffrey and Estabrook, Ryne and Kenny, Sarah and Bates, Timothy C and Mehta, Paras and Fox, John},
journal = {Psychometrika},
pages = {306--317},
title = {{{\{}OpenMx{\}}: {\{}An{\}} {\{}Open{\}} {\{}Source{\}} {\{}Extended{\}} {\{}Structural{\}} {\{}Equation{\}} {\{}Modeling{\}} {\{}Framework{\}}}},
volume = {76},
year = {2011}
}
@misc{year1999substance,
author = {{National Survey on Drug Use and Health}},
month = {mar},
publisher = {Substance Abuse and Mental Health Services Administration, Center for Behavioral Health Statistics and Quality},
title = {{National Survey on Drug Use and Health 2014}},
url = {https://www.datafiles.samhsa.gov/study/national-survey-drug-use-and-health-nsduh-2014-nid13618},
year = {2014}
}
@book{schafer_analysis_1968,
address = {London},
author = {Schafer, J L},
publisher = {Chapman {\&} Hall},
title = {{Analysis of {\{}Incomplete{\}} {\{}Multivariate{\}} {\{}Data{\}}.}},
year = {1968}
}
@article{vinchur_meta-analytic_1998,
author = {Vinchur, Andrew J and Schippmann, Jeffery S and Switzer, Fred S I I I and Roth, Philip L},
issn = {0021-9010},
journal = {Journal of Applied Psychology},
keywords = {Job Performance,Occupational Success Prediction,Sales Personnel,meta-analytic review of predictors of job perform,salespeople},
number = {4},
pages = {586--597},
title = {{A meta-analytic review of predictors of job performance for salespeople.}},
volume = {83},
year = {1998}
}
@article{mcardle_algebraic_1984,
author = {McArdle, J J and MacDonald, R P},
journal = {British Journal of Mathematical and Statistical Psychology},
pages = {234--251},
title = {{Some algebraic properties of the {\{}Reticular{\}} {\{}Action{\}} {\{}Model{\}} for moment structures}},
volume = {37},
year = {1984}
}
@article{smith_incremental_2003,
author = {Smith, Gregory T and Fischer, Sarah and Fister, Suzannah M},
issn = {1040-3590},
journal = {Psychological Assessment},
keywords = {Measurement,Statistical Validity,Test Construction,Test Validity,incremental validity,measurement,test construction},
number = {4},
pages = {467--477},
title = {{Incremental {\{}Validity{\}} {\{}Principles{\}} in {\{}Test{\}} {\{}Construction{\}}.}},
volume = {15},
year = {2003}
}
@article{sechrest_incremental_1963,
author = {Sechrest, Lee},
issn = {00131644},
journal = {Educational {\&} Psychological Measurement},
number = {1},
pages = {153},
title = {{Incremental {\{}Validity{\}} : a {\{}Recommendation{\}}.}},
volume = {23},
year = {1963}
}
@book{nunnally_psychometric_1967,
author = {Nunnally, Jum C},
publisher = {McGraw-Hill},
series = {{\{}McGraw{\}}-{\{}Hill{\}} series in psychology},
title = {{Psychometric theory.}},
year = {1967}
}
@misc{chawla_2016,
author = {Chawla, Dalmeet Singh},
booktitle = {Retraction Watch},
month = {oct},
title = {{Oh, well - "love hormone" doesn't reduce psychiatric symptoms, say researchers in request to retract}},
url = {http://retractionwatch.com/2016/10/04/oh-well-love-hormone-doesnt-reduce-psychiatric-symptoms-says-researchers-in-request-to-retract/},
year = {2016}
}
@book{oaks_statistical_1986,
author = {Oaks, M},
publisher = {New York: Wiley},
title = {{Statistical inference: {\{}A{\}} commentary for the social and behavioral sciences}},
year = {1986}
}
@article{hosking_l-moments:_1990,
author = {Hosking, Jonathan R M},
journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
pages = {105--124},
title = {{L-moments: analysis and estimation of distributions using linear combinations of order statistics}},
year = {1990}
}
@incollection{cook1976design,
address = {Chicago, IL},
author = {Cook, Thomas D and Campbell, Donald T},
booktitle = {Research in Organizations: Issues and Controversies},
editor = {Dunnett, M. D.},
pages = {223--326},
publisher = {Rand McNally.},
title = {{The design and conduct of quasi-experiments and true experiments in field settings}},
year = {1976}
}
@article{schmidt_statistical_1996,
author = {Schmidt, Frank L},
doi = {10.1037/1082-989X.1.2.115},
journal = {Psychological Methods},
number = {2},
pages = {115--129},
title = {{Statistical significance testing and cumulative knowledge in psychology: Implications for training of researchers.}},
volume = {1},
year = {1996}
}
@article{bollen_causal_2009,
author = {Bollen, Kenneth A and Davis, Walter R},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
number = {3},
pages = {498--522},
title = {{Causal {\{}Indicator{\}} {\{}Models{\}}: {\{}Identification{\}}, {\{}Estimation{\}}, and {\{}Testing{\}}.}},
volume = {16},
year = {2009}
}
@article{bollen_conventional_1991,
author = {Bollen, Kenneth A and Lennox, Richard},
issn = {0033-2909},
journal = {Psychological Bulletin},
keywords = {Statistical Measurement,Statistical Validity,structural equation perspective on applicability},
number = {2},
pages = {305--314},
title = {{Conventional wisdom on measurement: {\{}A{\}} structural equation perspective.}},
volume = {110},
year = {1991}
}
@book{enders_applied_2010,
address = {New York, NY},
author = {Enders, Craig K},
isbn = {978-1-60623-639-0},
publisher = {Guilford Press},
title = {{Applied missing data analysis.}},
year = {2010}
}
@book{cohen_statistical_1988,
author = {Cohen, Jacob},
isbn = {0-8058-0283-5},
publisher = {Hillsdale, N.J. : L. Erlbaum Associates, 1988.},
title = {{Statistical power analysis for the behavioral sciences.}},
year = {1988}
}
@article{pfaffel_missing_2016,
abstract = {A recurring methodological problem in the evaluation of the predictive validity of selection methods is that the values of the criterion variable are available for selected applicants only. This so-called range restriction problem causes biased population estimates. Correction methods for direct and indirect range restriction scenarios have widely studied for continuous criterion variables but not for dichotomous ones. The few existing approaches are inapplicable because they do not consider the unknown base rate of success. Hence, there is a lack of scientific research on suitable correction methods and the systematic analysis of their accuracies in the cases of a naturally or artificially dichotomous criterion. We aim to overcome this deficiency by viewing the range restriction problem as a missing data mechanism. We used multiple imputation by chained equations to generate complete criterion data before estimating the predictive validity and the base rate of success. Monte Carlo simulations were conducted to investigate the accuracy of the proposed correction in dependence of selection ratio, predictive validity, and base rate of success in an experimental design. In addition, we compared our proposed missing data approach with Thorndike's well-known correction formulas that have only been used in the case of continuous criterion variables so far. The results show that the missing data approach is more accurate in estimating the predictive validity than Thorndike's correction formulas. The accuracy of our proposed correction increases as the selection ratio and the correlation between predictor and criterion increase. Furthermore, the missing data approach provides a valid estimate of the unknown base rate of success. On the basis of our findings, we argue for the use of multiple imputation by chained equations in the evaluation of the predictive validity of selection methods when the criterion is dichotomous.},
author = {Pfaffel, Andreas and Kollmayer, Marlene and Schober, Barbara and Spiel, Christiane},
doi = {10.1371/journal.pone.0152330},
file = {::;::},
issn = {1932-6203},
journal = {PLOS ONE},
keywords = {Arithmetic,Experimental design,Mathematical functions,Monte Carlo method,Normal distribution,Psychometrics,Simulation and modeling,Standardized tests},
month = {mar},
number = {3},
pages = {e0152330},
shorttitle = {A {\{}Missing{\}} {\{}Data{\}} {\{}Approach{\}} to {\{}Correct{\}} for {\{}Di}},
title = {{A {\{}Missing{\}} {\{}Data{\}} {\{}Approach{\}} to {\{}Correct{\}} for {\{}Direct{\}} and {\{}Indirect{\}} {\{}Range{\}} {\{}Restrictions{\}} with a {\{}Dichotomous{\}} {\{}Criterion{\}}: {\{}A{\}} {\{}Simulation{\}} {\{}Study{\}}}},
url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0152330},
volume = {11},
year = {2016}
}
@article{bollen_two_2009,
author = {Bollen, Kenneth A and Davis, Walter R},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
keywords = {Identification,Methods,Structural Equation Models},
number = {3},
pages = {523--536},
title = {{Two {\{}Rules{\}} of {\{}Identification{\}} for {\{}Structural{\}} {\{}Equation{\}} {\{}Models{\}}.}},
volume = {16},
year = {2009}
}
@article{le_implications_2007,
author = {Le, H and Oh, I S and Shaffer, J and Schmidt, F},
issn = {15589080},
journal = {Academy of Management Perspectives},
keywords = {BUSINESS,MANAGEMENT},
number = {3},
pages = {6--15},
title = {{Implications of methodological advances for the practice of personnel selection: {\{}How{\}} practitioners benefit from meta-analysis.}},
volume = {21},
year = {2007}
}
@article{open_science_collaboration_estimating_2015,
abstract = {Empirically analyzing empirical evidence One of the central goals in any scientific endeavor is to understand causality. Experiments that seek to demonstrate a cause/effect relation most often manipulate the postulated causal factor. Aarts et al. describe the replication of 100 experiments reported in papers published in 2008 in three high-ranking psychology journals. Assessing whether the replication and the original experiment yielded the same result according to several criteria, they find that about one-third to one-half of the original findings were also observed in the replication study. Science, this issue 10.1126/science.aac4716 Structured Abstract INTRODUCTIONReproducibility is a defining feature of science, but the extent to which it characterizes current research is unknown. Scientific claims should not gain credence because of the status or authority of their originator but by the replicability of their supporting evidence. Even research of exemplary quality may have irreproducible empirical findings because of random or systematic error. RATIONALEThere is concern about the rate and predictors of reproducibility, but limited evidence. Potentially problematic practices include selective reporting, selective analysis, and insufficient specification of the conditions necessary or sufficient to obtain the results. Direct replication is the attempt to recreate the conditions believed sufficient for obtaining a previously observed finding and is the means of establishing reproducibility of a finding with new data. We conducted a large-scale, collaborative effort to obtain an initial estimate of the reproducibility of psychological science. RESULTSWe conducted replications of 100 experimental and correlational studies published in three psychology journals using high-powered designs and original materials when available. There is no single standard for evaluating replication success. Here, we evaluated reproducibility using significance and P values, effect sizes, subjective assessments of replication teams, and meta-analysis of effect sizes. The mean effect size (r) of the replication effects (Mr = 0.197, SD = 0.257) was half the magnitude of the mean effect size of the original effects (Mr = 0.403, SD = 0.188), representing a substantial decline. Ninety-seven percent of original studies had significant results (P {\textless}.05). Thirty-six percent of replications had significant results; 47{\%} of original effect sizes were in the 95{\%} confidence interval of the replication effect size; 39{\%} of effects were subjectively rated to have replicated the original result; and if no bias in original results is assumed, combining original and replication results left 68{\%} with statistically significant effects. Correlational tests suggest that replication success was better predicted by the strength of original evidence than by characteristics of the original and replication teams. CONCLUSIONNo single indicator sufficiently describes replication success, and the five indicators examined here are not the only ways to evaluate reproducibility. Nonetheless, collectively these results offer a clear conclusion: A large portion of replications produced weaker evidence for the original findings despite using materials provided by the original authors, review in advance for methodological fidelity, and high statistical power to detect the original effect sizes. Moreover, correlational evidence is consistent with the conclusion that variation in the strength of initial evidence (such as original P value) was more predictive of replication success than variation in the characteristics of the teams conducting the research (such as experience and expertise). The latter factors certainly can influence replication success, but they did not appear to do so here.Reproducibility is not well understood because the incentives for individual scientists prioritize novelty over replication. Innovation is the engine of discovery and is vital for a productive, effective scientific enterprise. However, innovative ideas become old news fast. Journal reviewers and editors may dismiss a new test of a published idea as unoriginal. The claim that “we already know this” belies the uncertainty of scientific evidence. Innovation points out paths that are possible; replication points out paths that are likely; progress relies on both. Replication can increase certainty when findings are reproduced and promote innovation when they are not. This project provides accumulating evidence for many findings in psychological research and suggests that there is still more work to do to verify whether we know what we think we know. {\textless}img class="fragment-image" src="https://d2ufo47lrtsv5s.cloudfront.net/content/sci/349/6251/aac4716/F1.medium.gif"/{\textgreater}Download high-res image Open in new tab Download Powerpoint Original study effect size versus replication effect size (correlation coefficients).Diagonal line represents replication effect size equal to original effect size. Dotted line represents replication effect size of 0. Points below the dotted line were effects in the opposite direction of the original. Density plots are separated by significant (blue) and nonsignificant (red) effects. Reproducibility is a defining feature of science, but the extent to which it characterizes current research is unknown. We conducted replications of 100 experimental and correlational studies published in three psychology journals using high-powered designs and original materials when available. Replication effects were half the magnitude of original effects, representing a substantial decline. Ninety-seven percent of original studies had statistically significant results. Thirty-six percent of replications had statistically significant results; 47{\%} of original effect sizes were in the 95{\%} confidence interval of the replication effect size; 39{\%} of effects were subjectively rated to have replicated the original result; and if no bias in original results is assumed, combining original and replication results left 68{\%} with statistically significant effects. Correlational tests suggest that replication success was better predicted by the strength of original evidence than by characteristics of the original and replication teams. A large-scale assessment suggests that experimental reproducibility in psychology leaves a lot to be desired. A large-scale assessment suggests that experimental reproducibility in psychology leaves a lot to be desired.},
author = {{Open Science Collaboration}},
doi = {10.1126/science.aac4716},
file = {::;::},
issn = {0036-8075, 1095-9203},
journal = {Science},
month = {aug},
number = {6251},
pages = {aac4716},
pmid = {26315443},
title = {{Estimating the reproducibility of psychological science}},
url = {http://science.sciencemag.org/content/349/6251/aac4716},
volume = {349},
year = {2015}
}
@article{pearson_mathematical_1903,
author = {Pearson, Karl},
journal = {Philosophical Transactions of the Royal Society of London. Series A, Containing Papers of a Mathematical or Physical Character},
pages = {1--66},
title = {{Mathematical Contributions to the Theory of Evolution. XI. On the Influence of Natural Selection on the Variability and Correlation of Organs}},
volume = {200},
year = {1903}
}
@article{trafimow_using_2017,
abstract = {There has been much controversy over the null hypothesis significance testing procedure, with much of the criticism centered on the problem of inverse inference. Specifically, p gives the probability of the finding (or one more extreme) given the null hypothesis, whereas the null hypothesis significance testing procedure involves drawing a conclusion about the null hypothesis given the finding. Many critics have called for null hypothesis significance tests to be replaced with confidence intervals. However, confidence intervals also suffer from a version of the inverse inference problem. The only known solution to the inverse inference problem is to use the famous theorem by Bayes, but this involves commitments that many researchers are not willing to make. However, it is possible to ask a useful question for which inverse inference is not a problem and that leads to the computation of the coefficient of confidence. In turn, and much more important, using the coefficient of confidence implies the desirability of switching from the current emphasis on a posteriori inferential statistics to an emphasis on a priori inferential statistics.},
author = {Trafimow, David},
doi = {10.1177/0013164416667977},
file = {::},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
month = {oct},
number = {5},
pages = {831--854},
title = {{Using the Coefficient of Confidence to Make the Philosophical Switch From A Posteriori to A Priori Inferential Statistics}},
url = {https://doi.org/10.1177/0013164416667977},
volume = {77},
year = {2017}
}
@book{thorndike_personnel_1949,
address = {Oxford England},
author = {Thorndike, Robert L},
publisher = {Wiley},
title = {{Personnel selection; test and measurement techniques}},
year = {1949}
}
@incollection{tukey1969analyzing,
address = {London, UK},
author = {Tukey, John W.},
booktitle = {The Collected Works of John W. Tukey},
editor = {Jones, Lyle V.},
pages = {721--737},
publisher = {Chapman {\&} Hall},
title = {{Analyzing data: Sanctification or detective work?}},
year = {1986}
}
@book{schafer_norm:_2010,
author = {Schafer, J L and Novo, A A and Fox, J},
title = {{Norm: Analysis of multivariate normal datasets with missing values}},
url = {http://cran.r-project.org/package=norm},
year = {2010}
}
@article{heckman_sample_1979,
abstract = {This paper discusses the bias that results from using nonrandomly selected samples to estimate behavioral relationships as an ordinary specification error or "omitted variables" bias. A simple consistent two stage estimator is considered that enables analysts to utilize simple regression methods to estimate behavioral functions by least squares methods. The asymptotic distribution of the estimator is derived.},
author = {Heckman, James J},
doi = {10.2307/1912352},
issn = {0012-9682},
journal = {Econometrica},
number = {1},
pages = {153--161},
title = {{Sample Selection Bias as a Specification Error}},
url = {http://www.jstor.org/stable/1912352},
volume = {47},
year = {1979}
}
@book{harlow_what_2016,
abstract = {The classic edition of What If There Were No Significance Tests? highlights current statistical inference practices. Four areas are featured as essential for making inferences: sound judgment, meaningful research questions, relevant design, and assessing fit in multiple ways. Other options (data visualization, replication or meta-analysis), other features (mediation, moderation, multiple levels or classes), and other approaches (Bayesian analysis, simulation, data mining, qualitative inquiry) are also suggested. The Classic Edition's new Introduction demonstrates the ongoing relevance of the topic and the charge to move away from an exclusive focus on NHST, along with new methods to help make significance testing more accessible to a wider body of researchers to improve our ability to make more accurate statistical inferences. Part 1 presents an overview of significance testing issues. The next part discusses the debate in which significance testing should be rejected or retained. The third part outlines various methods that may supplement significance testing procedures. Part 4 discusses Bayesian approaches and methods and the use of confidence intervals versus significance tests. The book concludes with philosophy of science perspectives. Rather than providing definitive prescriptions, the chapters are largely suggestive of general issues, concerns, and application guidelines. The editors allow readers to choose the best way to conduct hypothesis testing in their respective fields. For anyone doing research in the social sciences, this book is bound to become "must" reading. Ideal for use as a supplement for graduate courses in statistics or quantitative analysis taught in psychology, education, business, nursing, medicine, and the social sciences, the book also benefits independent researchers in the behavioral and social sciences and those who teach statistics.},
address = {New York, NY},
annote = {Google-Books-ID: 7lCpCwAAQBAJ},
author = {Harlow, Lisa L and Mulaik, Stanley A and Steiger, James H},
edition = {2nd},
isbn = {978-1-317-24285-7},
keywords = {Business {\&} Economics / Statistics,Education / Statistics,Mathematics / Probability {\&} Statistics / General,Psychology / Research {\&} Methodology,Psychology / Statistics},
month = {mar},
publisher = {Routledge},
shorttitle = {What If There Were No Significance Test},
title = {{What If There Were No Significance Tests?}},
year = {2016}
}
@article{kelley_reliability_1921,
author = {Kelley, Truman L},
issn = {00220671},
journal = {The Journal of Educational Research},
number = {5},
pages = {370--379},
title = {{The {\{}Reliability{\}} of {\{}Test{\}} {\{}Scores{\}}.}},
volume = {3},
year = {1921}
}
@article{van2017delusion,
author = {van der Leer, Leslie and Hartig, Bjoern and Goldmanis, Maris and McKay, Ryan},
journal = {Clinical Psychological Science},
pages = {2167702617698811},
publisher = {SAGE Publications Sage CA: Los Angeles, CA},
title = {{Why Do Delusion-Prone Individuals “Jump to Conclusions”? An Investigation Using a Nonserial Data-Gathering Paradigm}},
year = {2017}
}
@article{aitken_note_1935,
abstract = {The problem of statistical “selection” is concerned with the alteration induced in a frequency distribution in several variables by an alteration of the parameters in a subsection of the distribution. It may be illustrated by a simple trivariate case, as follows:From a population characterised by variables x, y, z, correlated and normally distributed, with means 0, 0, 0, variances and product variances r12$\sigma$1$\sigma$2, r13$\sigma$1$\sigma$3, r23$\sigma$2$\sigma$3, a sub-population is extracted by selection in x alone, in such a way that after selection x is still normally distributed, but with mean h and variance s2 . It is required to determine the new values, in the selected population, of the means and variances of y and z, and of the product variances.},
author = {Aitken, A C},
doi = {10.1017/S0013091500008063},
file = {::;::},
issn = {1464-3839, 0013-0915},
journal = {Proceedings of the Edinburgh Mathematical Society},
month = {jan},
number = {2},
pages = {106--110},
title = {{Note on Selection from a Multivariate Normal Population}},
url = {https://www.cambridge.org/core/journals/proceedings-of-the-edinburgh-mathematical-society/article/div-classtitlenote-on-selection-from-a-multivariate-normal-populationdiv/068A17825FA7E4399A4277ADCA5E35ED},
volume = {4},
year = {1935}
}
@article{fife_assessment_2012,
author = {Fife, Dustin A. and Mendoza, J L and Terry, Robert},
journal = {Educational and Psychological Measurement},
number = {5},
pages = {862--868},
title = {{The {\{}Assessment{\}} of {\{}Reliability{\}} {\{}Under{\}} {\{}Range{\}} {\{}Restriction{\}}: {\{}A{\}} {\{}Comparison{\}} of $\backslash${\$}{\{}\backslashtextbackslash{\}}alpha, {\o}mega,{\$} and {\{}Test{\}}-retest {\{}Reliability{\}} for {\{}Dichotomous{\}} {\{}Data{\}}}},
volume = {72},
year = {2012}
}
@article{kaminski_central_2007,
author = {Kaminski, M},
issn = {0040585X},
journal = {Theory of Probability and Its Applications},
keywords = {CENTRAL limit theorem,DEPENDENCE (Statistics),LIMIT theorems (Probability theory),MATHEMATICS,PROBABILITIES,central limit theorem,dependent random variables},
number = {2},
pages = {335--342},
title = {{Central limit theorem for certain classes of dependent random variables.}},
volume = {51},
year = {2007}
}
@article{collins_comparison_2001,
abstract = {Two classes of modern missing data procedures, maximum likelihood (ML) and multiple imputation (MI), tend to yield similar results when implemented in comparable ways. In either approach, it is possible to include auxiliary variables solely for the purpose of improving the missing data procedure. A simulation was presented to assess the potential costs and benefits of a restrictive strategy, which makes minimal use of auxiliary variables, versus an inclusive strategy, which makes liberal use of such variables. The simulation showed that the inclusive strategy is to be greatly preferred. With an inclusive strategy not only is there a reduced chance of inadvertently omitting an important cause of missingness, there is also the possibility of noticeable gains in terms of increased efficiency and reduced bias, with only minor costs. As implemented in currently available software, the ML approach tends to encourage the use of a restrictive strategy, whereas the MI approach makes it relatively simple to use an inclusive strategy.},
author = {Collins, L M and Schafer, J L and Kam, C M},
issn = {1082-989X},
journal = {Psychological Methods},
keywords = {Confidence Intervals,Data Collection,Experimental,Humans,Likelihood Functions,Models,Psychological Tests,Psychology,Psychometrics,Statistical},
month = {dec},
number = {4},
pages = {330--351},
pmid = {11778676},
title = {{A comparison of inclusive and restrictive strategies in modern missing data procedures}},
volume = {6},
year = {2001}
}
@article{carlstedt_differentiation_2001,
author = {Carlstedt, B},
journal = {Multivariate Behavioral Research},
number = {4},
pages = {586--609},
title = {{Differentiation of cognitive abilities as a function of level of general intelligence: {\{}A{\}} latent variable approach.}},
volume = {36},
year = {2001}
}
@article{erceg-hurn_modern_2008,
abstract = {Classic parametric statistical significance tests, such as analysis of variance and least squares regression, are widely used by researchers in many disciplines, including psychology. For classic parametric tests to produce accurate results, the assumptions underlying them (e.g., normality and homoscedasticity) must be satisfied. These assumptions are rarely met when analyzing real data. The use of classic parametric methods with violated assumptions can result in the inaccurate computation of p values, effect sizes, and confidence intervals. This may lead to substantive errors in the interpretation of data. Many modern robust statistical methods alleviate the problems inherent in using parametric methods with violated assumptions, yet modern methods are rarely used by researchers. The authors examine why this is the case, arguing that most researchers are unaware of the serious limitations of classic methods and are unfamiliar with modern alternatives. A range of modern robust and rank-based significance tests suitable for analyzing a wide range of designs is introduced. Practical advice on conducting modern analyses using software such as SPSS, SAS, and R is provided. The authors conclude by discussing robust effect size indices.},
author = {Erceg-Hurn, David M and Mirosevich, Vikki M},
doi = {10.1037/0003-066X.63.7.591},
issn = {0003-066X},
journal = {The American Psychologist},
keywords = {Data Interpretation,Humans,Nonparametric,Research,Software,Statistical,Statistics},
month = {oct},
number = {7},
pages = {591--601},
pmid = {18855490},
shorttitle = {Modern robust statistical methods},
title = {{Modern robust statistical methods: an easy way to maximize the accuracy and power of your research}},
volume = {63},
year = {2008}
}
@article{wonderlic_wonderlic_nodate,
author = {Wonderlic, Inc},
title = {{Wonderlic {\{}Personnel{\}} {\{}Test{\}} and {\{}Scholastic{\}} {\{}Level{\}} {\{}Exam{\}}.}}
}
@article{osborne_is_2013,
abstract = {Is data cleaning and the testing of assumptions relevant in the 21st century?},
author = {Osborne, Jason W},
doi = {10.3389/fpsyg.2013.00370},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Generalization (Psychology),Statistics as Topic,assumptions,data cleaning,replicability},
title = {{Is data cleaning and the testing of assumptions relevant in the 21st century?}},
url = {https://www.frontiersin.org/articles/10.3389/fpsyg.2013.00370/full},
volume = {4},
year = {2013}
}
@article{fidler_lessons_2007,
abstract = {Compelling arguments for reform of statistical practices have been made in many disciplines, in some cases over several decades, but achieving reform has proved difficult. We discuss how reform has progressed—or not progressed—in psychology, medicine, and ecology and describe case studies of attempts by pioneering journal editors to change statistical practices. Lessons for those seeking reform in education include the need to recognize the importance of journal editors and of provision of articles, books, and software that give practical guidance to researchers wishing to use the recommended statistical techniques. Research is required on recommended techniques so that statistical practice can become evidence based. Also, improvement in statistical practice should be encouraged along with improvement in the way a discipline theorizes. {\textcopyright} 2007 Wiley Periodicals, Inc. Psychol Schs 44: 441–449, 2007.},
author = {Fidler, Fiona and Cumming, Geoff},
doi = {10.1002/pits.20236},
file = {::;::},
issn = {1520-6807},
journal = {Psychology in the Schools},
month = {may},
number = {5},
pages = {441--449},
title = {{Lessons learned from statistical reform efforts in other disciplines}},
url = {http://onlinelibrary.wiley.com/doi/10.1002/pits.20236/abstract},
volume = {44},
year = {2007}
}
@article{edwards_fallacy_2011,
author = {Edwards, J R},
issn = {10944281},
journal = {Organizational Research Methods},
keywords = {APPLIED,MANAGEMENT,PSYCHOLOGY,measurement models,quantitative: structural equation modeling,reliability and validity},
number = {2},
pages = {370--388},
title = {{The {\{}Fallacy{\}} of {\{}Formative{\}} {\{}Measurement{\}}.}},
volume = {14},
year = {2011}
}
@book{kutner2004applied,
address = {New York, NY},
author = {Kutner, Michael H and Nachtsheim, Christopher J and Neter, John and Li, William},
booktitle = {Applied linear statistical models},
publisher = {McGraw-Hill/Irwin},
title = {{Applied linear statistical models}},
year = {2004}
}
@article{breiman_random_2001,
abstract = {Random forests are a combination of tree predictors such that each tree depends on the values of a random vector sampled independently and with the same distribution for all trees in the forest. The generalization error for forests converges a.s. to a limit as the number of trees in the forest becomes large. The generalization error of a forest of tree classifiers depends on the strength of the individual trees in the forest and the correlation between them. Using a random selection of features to split each node yields error rates that compare favorably to Adaboost (Y. Freund {\&} R. Schapire, Machine Learning: Proceedings of the Thirteenth International conference, ***, 148–156), but are more robust with respect to noise. Internal estimates monitor error, strength, and correlation and these are used to show the response to increasing the number of features used in the splitting. Internal estimates are also used to measure variable importance. These ideas are also applicable to regression.},
author = {Breiman, Leo},
doi = {10.1023/A:1010933404324},
file = {::;::},
issn = {0885-6125, 1573-0565},
journal = {Machine Learning},
month = {oct},
number = {1},
pages = {5--32},
title = {{Random Forests}},
url = {https://link.springer.com/article/10.1023/A:1010933404324},
volume = {45},
year = {2001}
}
@article{gross_not_1983,
author = {Gross, Alan L and Kagen, Edward},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {Selection Tests,Statistical Correlation,Test Validity,advantages of using correlation values uncorrecte,validation of selection tests},
number = {2},
pages = {389--396},
title = {{Not correcting for restriction of range can be advantageous.}},
volume = {43},
year = {1983}
}
@article{sackett_role_2012,
abstract = {This article examines the role of socioeconomic status (SES) in the relationships among college admissions-test scores, secondary school grades, and subsequent academic performance. Scores on the SAT (a test widely used in the admissions process in the United States), secondary school grades, college grades, and SES measures from 143,606 students at 110 colleges and universities were examined, and results of these analyses were compared with results obtained using a 41-school data set including scores from the prior version of the SAT and using University of California data from prior research on the role of SES. In all the data sets, the SAT showed incremental validity over secondary school grades in predicting subsequent academic performance, and this incremental relationship was not substantially affected by controlling for SES. The SES of enrolled students was very similar to that of specific schools' applicant pools, which suggests that the barrier to college for low-SES students in the United States is a lower rate of entering the college admissions process, rather than exclusion on the part of colleges.},
author = {Sackett, Paul R and Kuncel, Nathan R and Beatty, Adam S and Rigdon, Jana L and Shen, Winny and Kiger, Thomas B},
doi = {10.1177/0956797612438732},
file = {::},
issn = {0956-7976},
journal = {Psychological Science},
month = {sep},
number = {9},
pages = {1000--1007},
title = {{The {\{}Role{\}} of {\{}Socioeconomic{\}} {\{}Status{\}} in {\{}SAT{\}}-{\{}Grade{\}} {\{}Relationships{\}} and in {\{}College{\}} {\{}Admissions{\}} {\{}Decisions{\}}}},
url = {http://dx.doi.org/10.1177/0956797612438732},
volume = {23},
year = {2012}
}
@article{bedeian_correcting_1997,
author = {Bedeian, Arthur G and Day, David V and Kelloway, E Kevin},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {Correction for Attenuation,Error of Measurement,Estimation (Mathematics),Mathematical Models,Statistical Inference,Structural Equation Models},
number = {5},
pages = {785 ---- 99},
title = {{Correcting for Measurement Error Attenuation in Structural Equation Models: Some Important Reminders}},
volume = {57},
year = {1997}
}
@article{yang_developing_2004,
author = {Yang, H and Sackett, P R and Nho, Y},
journal = {Organizational Research Methods},
pages = {442--455},
title = {{Developing a procedure to correct for range restriction that involves both institutional selection and applicants' rejection of job offers.}},
volume = {7},
year = {2004}
}
@article{black2017painkillers,
author = {Black, Pamela and Hendy, Helen M},
journal = {Journal of ethnicity in substance abuse},
pages = {1--13},
publisher = {Taylor {\&} Francis},
title = {{Do painkillers serve as “hillbilly heroin” for rural adults with high levels of psychosocial stress?}},
year = {2017}
}
@article{lucke__2005,
author = {Lucke, Joseph F},
issn = {01466216},
journal = {Applied Psychological Measurement},
keywords = {LATENT variables,MULTIVARIATE analysis,PSYCHOLOGICAL tests,PSYCHOMETRICS,PSYCHOSOCIAL factors,RELIABILITY,coefficient alpha,coefficient omega,congeneric test theory,factor analysis,heterogeneity,homogeneity,internal consistency,reliability,true-score equivalence},
number = {1},
pages = {65--81},
title = {{The $\alpha$ and the {\$}{\o}mega{\$} of {\{}Congeneric{\}} {\{}Test{\}} {\{}Theory{\}}: {\{}An{\}} {\{}Extension{\}} of {\{}Reliability{\}} and {\{}Internal{\}} {\{}Consistency{\}} to {\{}Heterogeneous{\}} {\{}Tests{\}}.}},
volume = {29},
year = {2005}
}
@article{echambadi_mean-centering_2007,
abstract = {The cross-product term in moderated regression may be collinear with its constituent parts, making it difficult to detect main, simple, and interaction effects. The literature shows that mean-centering can reduce the covariance between the linear and the interaction terms, thereby suggesting that it reduces collinearity. We analytically prove that mean-centering neither changes the computational precision of parameters, the sampling accuracy of main effects, simple effects, interaction effects, nor the R2. We also show that the determinants of the cross product matrix X′ X are identical for uncentered and mean-centered data, so the collinearity problem in the moderated regression is unchanged by mean-centering. Many empirical marketing researchers commonly mean-center their moderated regression data hoping that this will improve the precision of estimates from ill conditioned, collinear data, but unfortunately, this hope is futile. Therefore, researchers using moderated regression models should not mean-center in a specious attempt to mitigate collinearity between the linear and the interaction terms. Of course, researchers may wish to mean-center for interpretive purposes and other reasons.},
author = {Echambadi, Raj and Hess, James D},
doi = {10.1287/mksc.1060.0263},
file = {::},
issn = {0732-2399},
journal = {Marketing Science},
month = {may},
number = {3},
pages = {438--445},
title = {{Mean-{\{}Centering{\}} {\{}Does{\}} {\{}Not{\}} {\{}Alleviate{\}} {\{}Collinearity{\}} {\{}Problems{\}} in {\{}Moderated{\}} {\{}Multiple{\}} {\{}Regression{\}} {\{}Models{\}}}},
url = {http://pubsonline.informs.org/doi/abs/10.1287/mksc.1060.0263},
volume = {26},
year = {2007}
}
@book{cohen_applied_2013,
address = {New York, NY},
author = {Cohen, Jacob and Cohen, P and West, S G and Aiken, L S},
publisher = {Routledge},
title = {{Applied multiple regression/correlation analysis for the behavioral sciences}},
year = {2013}
}
@article{bollen_model_2010,
author = {Bollen, K A and Bauldry, S},
doi = {10.1177/0049124110366238},
issn = {1552-8294},
journal = {Sociological Methods {\&} Research},
keywords = {Identification,Math,SEM},
month = {nov},
number = {2},
pages = {127--156},
title = {{Model {\{}Identification{\}} and {\{}Computer{\}} {\{}Algebra{\}}}},
volume = {39},
year = {2010}
}
@article{fife_revisiting_2013,
author = {Fife, Dustin A. and Mendoza, J L and Terry, Robert},
journal = {British Journal of Mathematical and Statistical Psychology},
number = {3},
pages = {521--542},
title = {{Revisiting Case IV: A Reassessment of Bias and Standard Errors of Case IV}},
volume = {66},
year = {2013}
}
@article{alexander_empirical_1989,
author = {Alexander, Ralph A and Carson, Kenneth P and Alliger, George M and Cronshaw, Steven F},
issn = {0021-9010},
journal = {Journal of Applied Psychology},
keywords = {Aptitude Measures,Frequency Distribution,Standard Deviation,Statistical Probability,Statistical Sample Parameters,Statistical Validity,Test Validity,empirical distributions of range restricted stand,test validity generalization analyses},
number = {2},
pages = {253--258},
title = {{Empirical distributions of range restricted $\backslash${\$}{\{}SD{\}}{\_}x{\$} in validity studies.}},
volume = {74},
year = {1989}
}
@article{sackett_correction_2000,
author = {Sackett, Paul R and Yang, Hyuckseung},
journal = {Journal of Applied Psychology},
number = {1},
pages = {112--118},
title = {{Correction for range restriction: An expanded typology}},
volume = {85},
year = {2000}
}
@incollection{behrens_exploratory_2003,
abstract = {In contrast to statistical approaches aimed at testing specific hypotheses, Exploratory Data Analysis (EDA) is a quantitative tradition that seeks to help researchers understand data when little or no statistical hypotheses exist, or when specific hypotheses exist but supplemental representations are needed to ensure the interpretability of statistical results. In this way, EDA seeks to answer the broad scientific questions of “what is going on here” and “how might I be fooled by my statistical results.” The techniques of EDA are discussed following the “4 Rs” of Revelation (graphics), Re-expression (scale transformation), Residuals (model building and assessment), and Resistance (using summaries unaffected by unexpected values). The philosophical justification for EDA is presented in terms of C.S. Pierce's concept of abduction and the recognition of a broad range of analytic needs that arise throughout the research process. Several previously published datasets from psychological literature are re-analyzed to illustrate the interpretive errors that can occur when techniques of EDA are omitted. In general, these errors occur because researchers unwittingly assume the existence of structure that is not supported by the data. Using the techniques of EDA, however, underlying structure is brought to the researcher's attention and appropriate interpretation can be obtained.},
annote = {DOI: 10.1002/0471264385.wei0202},
author = {Behrens, John T and Yu, Chong-Ho},
booktitle = {Handbook of Psychology},
file = {::},
isbn = {978-0-471-26438-5},
keywords = {abduction,exploratory data analysis,graphics,residuals,scale transformation,visualization},
publisher = {John Wiley {\&} Sons, Inc.},
title = {{Exploratory Data Analysis}},
url = {http://onlinelibrary.wiley.com/doi/10.1002/0471264385.wei0202/abstract},
year = {2003}
}
@article{fife_:_2014,
author = {Fife, Dustin A.},
title = {{{\{}DE{\}}: {\{}Performs{\}} an analysis of {\{}Data{\}} {\{}Elasticity{\}}}},
url = {https://github.com/dustinfife/DE},
year = {2014}
}
@article{dalal_common_2012,
abstract = {Additive transformations are often offered as a remedy for the common problem of collinearity in moderated regression and polynomial regression analysis. As the authors demonstrate in this article, mean-centering reduces nonessential collinearity but not essential collinearity. Therefore, in most cases, mean-centering of predictors does not accomplish its intended goal. In this article, the authors discuss and explain, through derivation of equations and empirical examples, that mean-centering changes lower order regression coefficients but not the highest order coefficients, does not change the fit of regression models, does not impact the power to detect moderating effects, and does not alter the reliability of product terms. The authors outline the positive effects of mean-centering, namely, the increased interpretability of the results and its importance for moderator analysis in structural equations and multilevel analysis. It is recommended that researchers center their predictor variables when their variables do not have meaningful zero-points within the range of the variables to assist in interpreting the results.},
author = {Dalal, Dev K and Zickar, Michael J},
doi = {10.1177/1094428111430540},
file = {::},
issn = {1094-4281},
journal = {Organizational Research Methods},
month = {jul},
number = {3},
pages = {339--362},
title = {{Some {\{}Common{\}} {\{}Myths{\}} {\{}About{\}} {\{}Centering{\}} {\{}Predictor{\}} {\{}Variables{\}} in {\{}Moderated{\}} {\{}Multiple{\}} {\{}Regression{\}} and {\{}Polynomial{\}} {\{}Regression{\}}}},
url = {http://dx.doi.org/10.1177/1094428111430540},
volume = {15},
year = {2012}
}
@phdthesis{le_correcting_2004,
author = {Le, Huy},
keywords = {indirect range restriction meta-analysis relations},
title = {{Correcting for indirect range restriction in meta-analysis: {\{}Testing{\}} a new meta-analytic method}},
year = {2004}
}
@book{bollen_structural_1989,
author = {Bollen, Kenneth A},
isbn = {0-471-01171-1},
publisher = {John Wiley {\&} Sons},
series = {Wiley series in probability and mathematical statistics. {\{}Applied{\}} probability and statistics section; 0271-6356},
title = {{Structural equations with latent variables.}},
year = {1989}
}
@article{judge_five-factor_1997,
author = {Judge, Timothy A and Martocchio, Joseph J and Thoresen, Carl J},
issn = {0021-9010},
journal = {Journal of Applied Psychology},
keywords = {5 factor model of personality {\&} absenteeism,Employee Absenteeism,Five Factor Personality Model,university employees},
number = {5},
pages = {745--755},
title = {{Five-factor model of personality and employee absence.}},
volume = {82},
year = {1997}
}
@article{roszkowski_better_2005,
author = {Roszkowski, Michael J and Spreat, Scott and Conroy, James and Garrow, James and Delaney, Michael M and Davis, Tom},
issn = {0965075X},
journal = {International Journal of Selection {\&} Assessment},
keywords = {ATTENDANCE,EMPLOYEES – Attitudes,JOB performance,NURSING care facilities,NURSING care facilities – Employees,PUNCTUALITY},
number = {3},
pages = {213--219},
title = {{Better {\{}Late{\}} than {\{}Never{\}}? {\{}The{\}} {\{}Relationship{\}} between {\{}Ratings{\}} of {\{}Attendance{\}}, {\{}Punctuality{\}}, and {\{}Overall{\}} {\{}Job{\}} {\{}Performance{\}} among {\{}Nursing{\}} {\{}Home{\}} {\{}Employees{\}}.}},
volume = {13},
year = {2005}
}
@book{little_statistical_2014,
abstract = {Praise for the First Edition of Statistical Analysis with Missing Data "An important contribution to the applied statistics literature.... I give the book high marks for unifying and making accessible much of the past and current work in this important area." —William E. Strawderman, Rutgers University "This book...provide[s] interesting real-life examples, stimulating end-of-chapter exercises, and up-to-date references. It should be on every applied statistician's bookshelf." —The Statistician "The book should be studied in the statistical methods department in every statistical agency." —Journal of Official Statistics Statistical analysis of data sets with missing values is a pervasive problem for which standard methods are of limited value. The first edition of Statistical Analysis with Missing Data has been a standard reference on missing-data methods. Now, reflecting extensive developments in Bayesian methods for simulating posterior distributions, this Second Edition by two acknowledged experts on the subject offers a thoroughly up-to-date, reorganized survey of current methodology for handling missing-data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe rigorous yet simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing-data mechanism and apply the theory to a wide range of important missing-data problems. The new edition now enlarges its coverage to include: Expanded coverage of Bayesian methodology, both theoretical and computational, and of multiple imputation Analysis of data with missing values where inferences are based on likelihoods derived from formal statistical models for the data-generating and missing-data mechanisms Applications of the approach in a variety of contexts including regression, factor analysis, contingency table analysis, time series, and sample survey inference Extensive references, examples, and exercises Amstat News asked three review editors to rate their top five favorite books in the September 2003 issue. Statistical Analysis With Missing Data was among those chosen.},
address = {Hoboken, NJ},
annote = {Google-Books-ID: AyVeBAAAQBAJ},
author = {Little, Roderick J A and Rubin, D B},
isbn = {978-1-118-62588-0},
keywords = {Mathematics / Probability {\&} Statistics / Bayesian,Mathematics / Probability {\&} Statistics / General,Mathematics / Probability {\&} Statistics / Stochast,Medical / Biostatistics},
month = {aug},
publisher = {John Wiley {\&} Sons},
title = {{Statistical Analysis with Missing Data}},
year = {2014}
}
@article{hofmann2015effect,
author = {Hofmann, Stefan G and Fang, Angela and Brager, Daniel N},
doi = {10.1016/j.psychres.2015.05.039},
journal = {Psychiatry research},
number = {3},
pages = {708},
publisher = {NIH Public Access},
title = {{Effect of intranasal oxytocin administration on psychiatric symptoms: a meta-analysis of placebo-controlled studies}},
volume = {228},
year = {2015}
}
@article{fleishman_method_1978,
author = {Fleishman, Allen I},
journal = {Psychometrika},
keywords = {Computer Programs,Monte Carlo Methods,Nonnormal Distributions,Statistical Analysis,Technical Reports},
title = {{A {\{}Method{\}} for {\{}Simulating{\}} {\{}Non{\}}-{\{}Normal{\}} {\{}Distributions{\}}.}},
year = {1978}
}
@article{hoekstra_are_2012,
abstract = {A valid interpretation of most statistical techniques requires that the criteria for one or more assumptions are met. In published articles, however, little information tends to be reported on whether the data satisfy the assumptions underlying the statistical techniques used. This could be due to self-selection: Only manuscripts with data fulfilling the assumptions are submitted. Another, more disquieting, explanation would be that violations of assumptions are hardly checked for in the first place. In this article a study is presented on whether and how 30 researchers checked fictitious data for violations of assumptions in their own working environment. They were asked to analyze the data as they would their own data, for which often used and well-known techniques like the t-procedure, ANOVA and regression were required. It was found that they hardly ever checked for violations of assumptions. Interviews afterwards revealed that mainly lack of knowledge and nonchalance, rather than more rational reasons like being aware of the robustness of a technique or unfamiliarity with an alternative, seem to account for this behavior. These data suggest that merely encouraging people to check for violations of assumptions will not lead them to do so, and that the use of statistics is opportunistic.},
author = {Hoekstra, Rink and Kiers, Henk and Johnson, Addie},
doi = {10.3389/fpsyg.2012.00137},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {analyzing data,assumptions,homogeneity,normality,robustness},
number = {137},
title = {{Are Assumptions of Well-Known Statistical Techniques Checked, and Why Not?}},
volume = {3},
year = {2012}
}
@book{mooney_monte_1997,
address = {Thousand Oaks, CA},
author = {Mooney, Christopher Z},
isbn = {0-8039-5943-5},
publisher = {Sage Publications},
series = {Quantitative applications in the social sciences: 116},
title = {{Monte {\{}Carlo{\}} simulation}},
year = {1997}
}
@article{hunter_implications_2006,
author = {Hunter, John E and Schmidt, Frank L and Le, Huy},
journal = {Journal of Applied Psychology},
keywords = {range restriction indirect range restriction bias},
number = {3},
pages = {594--612},
title = {{Implications of direct and indirect range restriction for meta-analysis methods and findings}},
volume = {91},
year = {2006}
}
@book{r_core_team_r:_2017,
address = {Vienna, Austria},
author = {{R Core Team}},
publisher = {R Foundation for Statistical Computing},
title = {{R: {\{}A{\}} {\{}Language{\}} and {\{}Environment{\}} for {\{}Statistical{\}} {\{}Computing{\}}}},
url = {https://www.r-project.org/},
year = {2017}
}
@article{WagenmakersPrereg,
abstract = { The veracity of substantive research claims hinges on the way experimental data are collected and analyzed. In this article, we discuss an uncomfortable fact that threatens the core of psychology's academic enterprise: almost without exception, psychologists do not commit themselves to a method of data analysis before they see the actual data. It then becomes tempting to fine tune the analysis to the data in order to obtain a desired result—a procedure that invalidates the interpretation of the common statistical tests. The extent of the fine tuning varies widely across experiments and experimenters but is almost impossible for reviewers and readers to gauge. To remedy the situation, we propose that researchers preregister their studies and indicate in advance the analyses they intend to conduct. Only these analyses deserve the label “confirmatory,” and only for these analyses are the common statistical tests valid. Other analyses can be carried out but these should be labeled “exploratory.” We illustrate our proposal with a confirmatory replication attempt of a study on extrasensory perception. },
annote = {PMID: 26168122},
author = {Wagenmakers, Eric-Jan and Wetzels, Ruud and Borsboom, Denny and van der Maas, Han L J and Kievit, Rogier A},
doi = {10.1177/1745691612463078},
journal = {Perspectives on Psychological Science},
number = {6},
pages = {632--638},
title = {{An Agenda for Purely Confirmatory Research}},
url = {https://doi.org/10.1177/1745691612463078},
volume = {7},
year = {2012}
}
@article{roth_missing_1994,
author = {Roth, Philip L},
issn = {1744-6570},
journal = {Personnel Psychology},
number = {3},
pages = {537--560},
title = {{Missing data: {\{}A{\}} conceptual review for applied psychologists.}},
volume = {47},
year = {1994}
}
@book{cascio_applied_2005,
author = {Cascio, Wayne F and Aguinis, Herman},
edition = {6th},
isbn = {978-0-13-512566-3},
publisher = {Upper Saddle River, NJ: Pearson Education},
title = {{Applied psychology in human resource management.}},
year = {2005}
}
@article{headrick_method_2012,
author = {Headrick, Todd C and Pant, Mohan D},
journal = {International Scholarly Research Notices},
title = {{A method for simulating nonnormal distributions with specified {\{}L{\}}-{\{}Skew{\}}, {\{}L{\}}-{\{}Kurtosis{\}}, and {\{}L{\}}-{\{}Correlation{\}}}},
volume = {2012},
year = {2012}
}
@article{jones_test_1952,
author = {Jones, Lyle V},
journal = {Psychological Bulletin},
number = {1},
pages = {43},
title = {{Test of hypotheses: one-sided vs. two-sided alternatives.}},
volume = {49},
year = {1952}
}
@article{schmidt_increasing_2006-1,
author = {Schmidt, Frank L and Oh, In-Sue and Le, Huy and In-Sue, Oh and Le, Huy},
journal = {Personnel Psychology},
number = {2},
pages = {281--305},
title = {{Increasing the accuracy of corrections for range restriction: implications for selection procedure validities and other research results}},
volume = {59},
year = {2006}
}
@article{Etz2016,
abstract = {We revisit the results of the recent Reproducibility Project: Psychology by the Open Science Collaboration. We compute Bayes factors—a quantity that can be used to express comparative evidence for an hypothesis but also for the null hypothesis—for a large subset (N = 72) of the original papers and their corresponding replication attempts. In our computation, we take into account the likely scenario that publication bias had distorted the originally published results. Overall, 75{\%} of studies gave qualitatively similar results in terms of the amount of evidence provided. However, the evidence was often weak (i.e., Bayes factor {\textless} 10). The majority of the studies (64{\%}) did not provide strong evidence for either the null or the alternative hypothesis in either the original or the replication, and no replication attempts provided strong evidence in favor of the null. In all cases where the original paper provided strong evidence but the replication did not (15{\%}), the sample size in the replication was smaller than the original. Where the replication provided strong evidence but the original did not (10{\%}), the replication sample size was larger. We conclude that the apparent failure of the Reproducibility Project to replicate many target effects can be adequately explained by overestimation of effect sizes (or overestimation of evidence against the null hypothesis) due to small sample sizes and publication bias in the psychological literature. We further conclude that traditional sample sizes are insufficient and that a more widespread adoption of Bayesian methods is desirable.},
author = {Etz, Alexander and Vandekerckhove, Joachim},
doi = {10.1371/journal.pone.0149794},
editor = {Marinazzo, Daniele},
issn = {1932-6203},
journal = {PLOS ONE},
month = {feb},
number = {2},
pages = {e0149794},
publisher = {Public Library of Science},
title = {{A Bayesian Perspective on the Reproducibility Project: Psychology}},
url = {http://dx.plos.org/10.1371/journal.pone.0149794},
volume = {11},
year = {2016}
}
@article{Kosslyn1989,
abstract = {Many charts and graphs do not convey information effectively. This article develops a way of analysing the information in charts and graphs that reveals the design flaws in the display. The analytic scheme requires isolating four types of constituents in a display, and specifying their structure and interrelations at a syntactic, semantic, and pragmatic level of analysis. As the description is constructed, one checks for violations of ‘acceptability principles', which are derived from facts about human visual information processing and from an analysis of the nature of symbols. Violations of these principles reveal the source of potential difficulties in using a display.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Kosslyn, Stephen M.},
doi = {10.1002/acp.2350030302},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {1099-0720},
issn = {10990720},
journal = {Applied Cognitive Psychology},
pmid = {25246403},
title = {{Understanding charts and graphs}},
year = {1989}
}
@article{Morey2016,
abstract = {Traditional Null Hypothesis Testing procedures are poorly adapted to theory testing. The methodology can mislead researchers in several ways, including: (a) a lack of power can result in an erroneous rejection of the theory; (b) the focus on directionality (ordinal tests) rather than more precise quantitative predictions limits the information gained; and (c) the misuse of probability values to indicate effect size. An alternative approach is proposed which involves employing the theory to generate explicit effect size predictions that are compared to the effect size estimates and related confidence intervals to test the theoretical predictions. This procedure is illustrated employing the Transtheoretical Model. Data from a sample (N = 3,967) of smokers from a large New England HMO system were used to test the model. There were a total of 15 predictions evaluated, each involving the relation between Stage of Change and one of the other 15 Transtheoretical Model variables. For each variable, omega-squared and the related confidence interval were calculated and compared to the predicted effect sizes. Eleven of the 15 predictions were confirmed, providing support for the theoretical model. Quantitative predictions represent a much more direct, informative, and strong test of a theory than the traditional test of significance.},
archivePrefix = {arXiv},
arxivId = {1105.1486},
author = {Morey, Richard D. and Hoekstra, Rink and Rouder, Jeffrey N. and Lee, Michael D. and Wagenmakers, Eric-Jan},
doi = {10.3758/s13423-015-0947-8},
eprint = {1105.1486},
file = {::;::},
isbn = {1069-9384},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
keywords = {Bayesian inference and parameter estimation,Bayesian statistics,Statistical inference,Statistics},
number = {1},
pmid = {22837590},
title = {{The fallacy of placing confidence in confidence intervals}},
volume = {23},
year = {2016}
}
@article{roth_toward_2011-1,
author = {Roth, Philip L and {Switzer Iii}, Fred S and {Van Iddekinge}, Chad H and Oh, In-Sue and Switzer, Fred S and {Van Iddekinge}, Chad H and In-Sue, Oh},
issn = {00315826},
journal = {Personnel Psychology},
keywords = {GROUP differences,JOB performance,META-analysis,ORGANIZATIONAL behavior,PERSONNEL management,RESEARCH,SOCIAL psychology},
number = {4},
pages = {899--935},
title = {{Toward {\{}Better{\}} {\{}Meta{\}}-{\{}Analytic{\}} {\{}Matrices{\}}: {\{}How{\}} {\{}Input{\}} {\{}Values{\}} {\{}Can{\}} {\{}Affect{\}} {\{}Research{\}} {\{}Conclusions{\}} {\{}In{\}} {\{}Human{\}} {\{}Resource{\}} {\{}Management{\}} {\{}Simulations{\}}.}},
volume = {64},
year = {2011}
}
@article{Lewandowsky1989a,
abstract = {Graphs have been an essential tool for the analysis and communication of statistical data for about 200 years. Despite widespread use and their importance in science, business, and many other walks of life, relatively little is known about how people perceive and process statistical graphs. This article reviews several empirical studies designed to explore the suitability of various graphs for a variety of purposes, and discusses the relevant theoretical psychological literature. The role of traditional psychophysics is considered, especially in connection with the long-running dispute concerning the relative merits of pie and bar charts. The review also discusses experiments on the perception of scatterplots and the use of multivariate displays, and points out the need for more empirical work.},
author = {Lewandowsky, Stephan and Spence, Ian},
doi = {10.1177/0049124189018002002},
file = {::;:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Lewandowsky, Spence - 1990 - The Perception of Statistical Graphs.pdf:pdf},
issn = {15528294},
journal = {Sociological Methods {\&} Research},
month = {nov},
number = {2-3},
pages = {200--242},
publisher = {SAGE PUBLICATIONS},
title = {{The Perception of Statistical Graphs}},
url = {http://journals.sagepub.com/doi/10.1177/0049124189018002002},
volume = {18},
year = {1989}
}
@article{rodgers_epistemology_2010,
abstract = {A quiet methodological revolution, a modeling revolution, has occurred over the past several decades, almost without discussion. In contrast, the 20th century ended with contentious argument over the utility of null hypothesis significance testing (NHST). The NHST controversy may have been at least partially irrelevant, because in certain ways the modeling revolution obviated the NHST argument. I begin with a history of NHST and modeling and their relation to one another. Next, I define and illustrate principles involved in developing and evaluating mathematical models. Following, I discuss the difference between using statistical procedures within a rule-based framework and building mathematical models from a scientific epistemology. Only the former is treated carefully in most psychology graduate training. The pedagogical implications of this imbalance and the revised pedagogy required to account for the modeling revolution are described. To conclude, I discuss how attention to modeling implies shifting statistical practice in certain progressive ways. The epistemological basis of statistics has moved away from being a set of procedures, applied mechanistically, and moved toward building and evaluating statistical and scientific models.},
author = {Rodgers, Joseph Lee},
doi = {10.1037/a0018326},
isbn = {1935-990X; 0003-066X},
issn = {1935-990X},
journal = {The American Psychologist},
keywords = {Biomedical Research,Data Interpretation,Experimental,Humans,Mathematics,Models,Psychological,Psychology,Research Design,Sir Ronald Fisher,Statistical,Statistics as Topic,mathematical models,null hypothesis significance testing (NHST),statistical models,teaching methodology},
month = {jan},
number = {1},
pages = {1--12},
pmid = {20063905},
shorttitle = {The epistemology of mathematical and statistical m},
title = {{The epistemology of mathematical and statistical modeling: a quiet methodological revolution}},
volume = {65},
year = {2010}
}
@article{cohen_earth_1994,
abstract = {After 4 decades of severe criticism, the ritual of null hypothesis significance testing (mechanical dichotomous decisions around a sacred .05 criterion) still persists. This article reviews the problems with this practice, including near universal misinterpretation of p as the probability that H₀ is false, the misinterpretation that its complement is the probability of successful replication, and the mistaken assumption that if one rejects H₀ one thereby affirms the theory that led to the test. Exploratory data analysis and the use of graphic methods, a steady improvement in and a movement toward standardization in measurement, an emphasis on estimating effect sizes using confidence intervals, and the informed use of available statistical methods are suggested. For generalization, psychologists must finally rely, as has been done in all the older sciences, on replication. (PsycINFO Database Record (c) 2012 APA, all rights reserved)},
author = {Cohen, Jacob},
doi = {10.1037/0003-066X.49.12.997},
file = {::},
journal = {American Psychologist},
number = {12},
pages = {997--1003},
title = {{The earth is round (p {\textless} .05).}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0003-066X.49.12.997},
volume = {49},
year = {1994}
}
@article{Cortina2011,
abstract = {Continued discussion and debate regarding the appropriate use of null hypothesis significance testing (NHST) has led to greater reliance on effect size testing (EST) in published literature. This article examines the myth that uncritical replacement of NHST with EST will improve our science. The use of NHST and EST is described along with a summary of the arguments offered in support and against both. After addressing the veracity of these assertions, the article describes the concept of the translation mechanism and compares the success of NHST and EST as mechanisms. Finally, the authors suggest changes that may facilitate translation in future research.},
author = {Cortina, Jose M. and Landis, Ronald S.},
doi = {10.1177/1094428110391542},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Cortina, Landis - 2011 - The Earth Is iNoti Round ( ipi = .00).pdf:pdf},
issn = {1094-4281},
journal = {Organizational Research Methods},
keywords = {philosophy of science,quantitative research,research design},
month = {apr},
number = {2},
pages = {332--349},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{The Earth Is {\textless}i{\textgreater}Not{\textless}/i{\textgreater} Round ( {\textless}i{\textgreater}p{\textless}/i{\textgreater} = .00)}},
url = {http://journals.sagepub.com/doi/10.1177/1094428110391542},
volume = {14},
year = {2011}
}
@article{Garfield2002,
abstract = {This paper defines statistical reasoning and reviews research on this topic. Types of correct and incorrect reasoning are summarized, and statistical reasoning about sampling distributions is examined in more detail. A model of statistical reasoning is presented, and suggestions are offered for assessing statistical reasoning. The paper concludes with implications for teaching students in ways that will facilitate the development of their statistical reasoning.},
annote = {From Duplicate 2 (The Challenge of Developing Statistical Reasoning - Garfield, Joan)

This is so Ed psych. Just bad, fluffy research. I did like the notes about common misconceptions Student had about statistics.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Garfield, Joan},
doi = {10.1080/10691898.2002.11910676},
eprint = {arXiv:1011.1669v3},
file = {::;::},
isbn = {1402022786},
issn = {10691898},
journal = {Journal of Statistics Education},
keywords = {Assessment,Statistical reasoning,Statistics,misconceptions,statistical reasoning},
mendeley-tags = {Statistics,misconceptions,statistical reasoning},
number = {3},
pmid = {25246403},
publisher = {Joan Garfield},
title = {{The challenge of developing statistical reasoning}},
url = {http://www.tandfonline.com/action/journalInformation?journalCode=ujse20},
volume = {10},
year = {2002}
}
@article{Sohn1998,
abstract = {In spite of arguments to the contrary, psychologists, it is shown here, believe statistical significance (SS) signifies that a finding will replicate. The most visible argument that SS is not an index of replicability, one that is based in notions of Bayesian statistical inference, is considered and shown to be flawed. Two different arguments are presented that demonstrate the irrelevance of SS to replicability: (a) SS may not be taken as a sign of the truth of the research hypothesis; and (b) statistical significance tests do not generate verifiable predictions of replication attempts. Direct tests of replicability and effect-size measures of replicability are shown to have comparable problems. A solution to the replicability problem is proposed for atheoretical research that replaces `once-and-for-all' tests of replicability with the requirement that the treatment effect be demonstrable (a) in the individual, (b) on a continuing basis, and (c) in a way that is clearly discernible. The question of the ro...},
author = {Sohn, David},
doi = {10.1177/0959354398083001},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Sohn - 1998 - Statistical Significance and Replicability.pdf:pdf},
issn = {0959-3543},
journal = {Theory {\&} Psychology},
keywords = {antiempirical,antitheoretical,effect sizes and p-values,null hypothesis testing,replicability and replication,significance testing,statistical significance,theory-data link},
mendeley-tags = {antiempirical,antitheoretical,effect sizes and p-values,theory-data link},
month = {jun},
number = {3},
pages = {291--311},
publisher = {Sage PublicationsSage CA: Thousand Oaks, CA},
title = {{Statistical Significance and Replicability}},
url = {http://journals.sagepub.com/doi/10.1177/0959354398083001},
volume = {8},
year = {1998}
}
@article{Alvarez2011,
abstract = {The visual system can only accurately represent a handful of objects at once. How do we cope with this severe capacity limitation? One possibility is to use selective attention to process only the most relevant incoming information. A complementary strategy is to represent sets of objects as a group or ensemble (e.g. represent the average size of items). Recent studies have established that the visual system computes accurate ensemble representations across a variety of feature domains and current research aims to determine how these representations are computed, why they are computed and where they are coded in the brain. Ensemble representations enhance visual cognition in many ways, making ensemble coding a crucial mechanism for coping with the limitations on visual processing. {\textcopyright} 2011 Elsevier Ltd.},
author = {Alvarez, George A.},
doi = {10.1016/j.tics.2011.01.003},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Alvarez - 2011 - Representing multiple objects as an ensemble enhances visual cognition.pdf:pdf},
isbn = {1364-6613},
issn = {13646613},
journal = {Trends in Cognitive Sciences},
number = {3},
pages = {122--131},
pmid = {21292539},
publisher = {Elsevier Ltd},
title = {{Representing multiple objects as an ensemble enhances visual cognition}},
url = {http://dx.doi.org/10.1016/j.tics.2011.01.003},
volume = {15},
year = {2011}
}
@book{hunter_methods_2004-1,
address = {Thousand Oaks, CA},
author = {Hunter, John E and Schmidt, Frank L},
isbn = {1-4129-0912-0},
publisher = {Sage Publications},
title = {{Methods of meta-analysis : correcting error and bias in research findings}},
year = {2004}
}
@article{bollen_three_2011,
author = {Bollen, Kenneth A and Bauldry, Shawn},
journal = {Psychological Methods},
number = {3},
pages = {265--284},
title = {{Three {\{}Cs{\}} in measurement models: {\{}Causal{\}} indicators, composite indicators, and covariates}},
volume = {16},
year = {2011}
}
@book{hosking_regional_2005,
author = {Hosking, Jonathan R M and Wallis, James R},
publisher = {Cambridge University Press},
title = {{Regional frequency analysis: an approach based on {\{}L{\}}-moments}},
year = {2005}
}
@book{iacobucci_mediation_2008,
address = {Thousand Oaks, CA},
author = {Iacobucci, D},
publisher = {Sage},
title = {{Mediation analysis.}},
year = {2008}
}
@book{hunter_methods_2004,
address = {Thousand Oaks, CA},
author = {Hunter, John E and Schmidt, Frank L},
isbn = {1-4129-0912-0},
publisher = {Sage},
title = {{Methods of meta-analysis: correcting error and bias in research findings}},
year = {2004}
}
@article{wiberg_comparison_2009,
author = {Wiberg, Marie and Sundstr{\"{o}}m, Anna},
journal = {Practical Assessment, Research {\&} Evaluation},
number = {5},
pages = {2},
title = {{A comparison of two approaches to correction of restriction of range in correlation analysis}},
volume = {14},
year = {2009}
}
@article{muthen_selection_1993,
author = {Muth{\'{e}}n, Bengt O and Hsu, Jin-wen Y},
issn = {0007-1102},
journal = {British Journal of Mathematical and Statistical Psychology},
keywords = {Mathematical Modeling,Statistical Samples,Statistical Validity,sample size {\&} estimators of predictive validity o},
number = {2},
pages = {255--271},
title = {{Selection and predictive validity with latent variable structures.}},
volume = {46},
year = {1993}
}
@article{cortina_what_1993,
author = {Cortina, Jose M},
issn = {00219010},
journal = {Journal of Applied Psychology},
keywords = {BEGINNING,CAUSATION,HYPOTHESIS,LOGIC,METAPHYSICS,THEORY},
number = {1},
pages = {98--104},
title = {{What {\{}Is{\}} {\{}Coefficient{\}} {\{}Alpha{\}}? {\{}An{\}} {\{}Examination{\}} of {\{}Theory{\}} and {\{}Applications{\}}.}},
volume = {78},
year = {1993}
}
@article{lawley_note_1944,
author = {Lawley, D N},
doi = {https://doi.org/10.1017/S0080454100006385},
journal = {Proceedings of the Royal Society of Edinburgh. Section A. Mathematical and Physical Sciences},
number = {01},
pages = {28--30},
title = {{A Note on Karl Pearson's Selection Formulae}},
volume = {62},
year = {1944}
}
@article{shrout_intraclass_1979,
abstract = {Reliability coefficients often take the form of intraclass correlation coefficients. In this article, guidelines are given for choosing among 6 different forms of the intraclass correlation for reliability studies in which n targets are rated by k judges. Relevant to the choice of the coefficient are the appropriate statistical model for the reliability study and the applications to be made of the reliability results. Confidence intervals for each of the forms are reviewed. (23 ref) (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
author = {Shrout, Patrick E and Fleiss, Joseph L},
doi = {10.1037/0033-2909.86.2.420},
file = {::},
issn = {0033-2909},
journal = {Psychological Bulletin},
keywords = {1979,Rating,Statistical Correlation,Statistical Reliability,intraclass correlation coefficients,rater reliability assessment},
month = {mar},
number = {2},
pages = {420--428},
shorttitle = {Intraclass correlations},
title = {{Intraclass correlations: {\{}Uses{\}} in assessing rater reliability}},
url = {http://search.ebscohost.com/login.aspx?direct=true{\&}db=psyh{\&}AN=1979-25169-001{\&}site=ehost-live},
volume = {86},
year = {1979}
}
@misc{fife_selection:_2016,
annote = {R package version 1.0},
author = {Fife, Dustin A.},
title = {{selection: Estimating unattenuated correlations on range restricted correlations.}},
url = {https://cran.r-project.org/package=selection},
year = {2016}
}
@article{belia_researchers_2005,
author = {Belia, Sarah and Fidler, Fiona and Williams, Jennifer and Cumming, Geoff},
journal = {Psychological methods},
number = {4},
pages = {389},
title = {{Researchers misunderstand confidence intervals and standard error bars.}},
volume = {10},
year = {2005}
}
@article{mendoza_fisher_1993,
author = {Mendoza, J L},
issn = {00333123},
journal = {Psychometrika},
number = {4},
pages = {601--615},
title = {{Fisher Transformations for Correlations Corrected for Selection and Missing Data}},
volume = {58},
year = {1993}
}
@article{enders_relative_2001,
author = {Enders, C K and Bandalos, D L},
journal = {Structural Equation Modeling},
title = {{The relative performance of full information likelihood estimation for missing data in structural equation models.}},
volume = {8},
year = {2001}
}
@incollection{bekker_identification_2001,
annote = {DOI: 10.1002/9780470996249.ch8},
author = {Bekker, Paul and Wansbeek, Tom},
booktitle = {A companion to theoretical econometrics},
editor = {Baltagi, Badi H},
pages = {144--161},
publisher = {Blackwell Publishing Ltd},
title = {{Identification in parametric models}},
year = {2001}
}
@book{graham_missing_2012,
address = {New York, NY},
author = {Graham, J W},
publisher = {Springer},
title = {{Missing Data: Analysis and Design}},
year = {2012}
}
@article{cook_relation_2000,
author = {Cook, K W and Vance, C A and Spector, P E},
journal = {Journal of Applied Social Psychology},
pages = {867--885},
title = {{The relation of candidate personality with selection interview outcomes.}},
volume = {30},
year = {2000}
}
@article{counsell_reporting_2017,
abstract = {With recent focus on the state of research in psychology, it is essential to assess the nature of the statistical methods and analyses used and reported by psychological researchers. To that end, we investigated the prevalence of different statistical procedures and the nature of statistical reporting practices in recent articles from the 4 major Canadian psychology journals. The majority of authors evaluated their research hypotheses through the use of analysis of variance, t tests, and multiple regression. Multivariate approaches were less common. Null hypothesis significance testing remains a popular strategy, but the majority of authors reported a standardized or unstandardized effect size measure alongside their significance test results. Confidence intervals on effect sizes were infrequently employed. Many authors provided minimal details about their statistical analyses and less than a third of the articles presented on data complications such as missing data and violations of statistical assumptions. Strengths of and areas needing improvement for reporting quantitative results are highlighted. The article concludes with recommendations for how researchers and reviewers can improve comprehension and transparency in statistical reporting. (PsycINFO Database Record (c) 2017 APA, all rights reserved)},
author = {Counsell, Alyssa and Harlow, Lisa. L},
doi = {10.1037/cap0000074},
issn = {1878-7304(Electronic),0708-5591(Print)},
journal = {Canadian Psychology/Psychologie canadienne},
keywords = {*Psychology,*Quantitative Methods,*Scientific Communication,*Statistics,Analysis of Variance,Confidence Limits (Statistics),Effect Size (Statistical),Multiple Regression,Statistical Significance,T Test},
number = {2},
pages = {140--147},
title = {{Reporting practices and use of quantitative methods in Canadian journal articles in psychology.}},
volume = {58},
year = {2017}
}
@article{hurtz_personality_2000,
author = {Hurtz, Gregory M and Donovan, John J},
issn = {0021-9010},
journal = {Journal of Applied Psychology},
keywords = {Five Factor Personality Model,Job Performance,Personality Measures,Personality Traits,Prediction,Test Validity,meta-analytic estimate of criterion-related valid},
number = {6},
pages = {869--879},
title = {{Personality and job performance: {\{}The{\}} {\{}Big{\}} {\{}Five{\}} revisited.}},
volume = {85},
year = {2000}
}
@article{cumming_new_2014,
abstract = {We need to make substantial changes to how we conduct research. First, in response to heightened concern that our published research literature is incomplete and untrustworthy, we need new requirements to ensure research integrity. These include prespecification of studies whenever possible, avoidance of selection and other inappropriate data-analytic practices, complete reporting, and encouragement of replication. Second, in response to renewed recognition of the severe flaws of null-hypothesis significance testing (NHST), we need to shift from reliance on NHST to estimation and other preferred techniques. The new statistics refers to recommended practices, including estimation based on effect sizes, confidence intervals, and meta-analysis. The techniques are not new, but adopting them widely would be new for many researchers, as well as highly beneficial. This article explains why the new statistics are important and offers guidance for their use. It describes an eight-step new-statistics strategy for research with integrity, which starts with formulation of research questions in estimation terms, has no place for NHST, and is aimed at building a cumulative quantitative discipline.},
author = {Cumming, Geoff},
doi = {10.1177/0956797613504966},
file = {::},
issn = {0956-7976},
journal = {Psychological Science},
month = {jan},
number = {1},
pages = {7--29},
shorttitle = {The {\{}New{\}} {\{}Statistics{\}}},
title = {{The New Statistics: Why and How}},
url = {http://dx.doi.org/10.1177/0956797613504966},
volume = {25},
year = {2014}
}
@inproceedings{cumming_statistical_2001,
abstract = {Psychology remains addicted to null hypothesis significance testing despite decades of effort by reformers. Extensive changes in statistical understanding and practices are needed. The authors propose a model of reform—the statistical re-education of psychology—by making an analogy with the conceptual change model of learning. Four diverse components of reform are identified, and illustrated by brief examples of research. Reform is especially challenging because many statistics teachers in psychology first need to achieve conceptual change themselves. In relation to a highly desirable increase in use of confidence intervals (CIs), it seems that many psychologists do not understand CIs well, and guidelines for CI use are lacking. The conceptual change model is offered to guide research needed on many aspects of reform, and the important and exciting task of the statistical re-education of psychology. PSYCHOLOGY MUST MOVE BEYOND FLAWED NHST PRACTICES Psychology is addicted to null hypothesis significance testing (NHST), despite decades of criticism of the technique, and evidence that NHST is widely misunderstood and has caused great damage. Finch, Thomason, and Cumming (in press) and Nickerson (2000) gave reviews. Important changes may now at last be possible, with the American Psychological Association},
address = {Hawthorn, Victorial},
author = {Cumming, Geoff and Fidler, Fiona and Thomason, Neil},
booktitle = {The 6th International Conference on Teaching Statistics.},
file = {::},
publisher = {Swinburne Press},
title = {{The Statistical Re-Education of Psychology}},
year = {2001}
}
@article{deshon_cautionary_1998,
author = {DeShon, Richard P},
issn = {1082-989X},
journal = {Psychological Methods},
keywords = {Error of Measurement,Structural Equation Modeling,measurement error corrections in structural equat},
number = {4},
pages = {412--423},
title = {{A cautionary note on measurement error corrections in structural equation models.}},
volume = {3},
year = {1998}
}
@book{maxwell_designing_2004,
address = {New York, NY},
author = {Maxwell, Scott E and Delaney, Harold D},
isbn = {0-8058-3718-3},
publisher = {Taylor {\&} Francis},
title = {{Designing experiments and analyzing data: A model comparison perspective}},
year = {2004}
}
@article{hunsley_incremental_2003,
author = {Hunsley, J and Meyer, G J},
issn = {10403590},
journal = {PSYCHOLOGICAL ASSESSMENT},
pages = {446--455},
title = {{The {\{}Incremental{\}} {\{}Validity{\}} of {\{}Psychological{\}} {\{}Testing{\}} and {\{}Assessment{\}}: {\{}Conceptual{\}}, {\{}Methodological{\}}, and {\{}Statistical{\}} {\{}Issues{\}}.}},
url = {http://libraries.ou.edu/access.aspx?url=http://search.ebscohost.com.ezproxy.lib.ou.edu/login.aspx?direct=true{\&}db=edsbl{\&}AN=RN142759353{\&}site=eds-live},
volume = {15},
year = {2003}
}
@article{sackett_effects_2002,
author = {Sackett, Paul R and Laczo, Roxanne M and Arvey, Richard D},
journal = {Personnel Psychology},
number = {4},
pages = {807--825},
title = {{The effects of range restriction on estimates of criterion interrater reliability: Implications for validation research}},
volume = {55},
year = {2002}
}
@article{hunsley_introduction_2003,
author = {Hunsley, J},
issn = {10403590},
journal = {PSYCHOLOGICAL ASSESSMENT},
pages = {443--445},
title = {{Introduction to the {\{}Special{\}} {\{}Section{\}} on {\{}Incremental{\}} {\{}Validity{\}} and {\{}Utility{\}} in {\{}Clinical{\}} {\{}Assessment{\}}.}},
url = {http://libraries.ou.edu/access.aspx?url=http://search.ebscohost.com.ezproxy.lib.ou.edu/login.aspx?direct=true{\&}db=edsbl{\&}AN=RN142759341{\&}site=eds-live},
volume = {15},
year = {2003}
}
@article{kromrey_mean_1998,
abstract = {Centering variables prior to the analysis of moderated multiple regression equations has been advocated for reasons both statistical (reduction of multicollinearity) and substantive (improved interpretation of the resulting regression equations). This article provides a comparison of centered and raw score analyses in least squares regression. The two methods are demonstrated to be equivalent, yielding identical hypothesis tests associated with the moderation effect and regression equations that are functionally equivalent.},
author = {Kromrey, Jeffrey D and Foster-Johnson, Lynn},
doi = {10.1177/0013164498058001005},
file = {::},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
month = {feb},
number = {1},
pages = {42--67},
shorttitle = {Mean {\{}Centering{\}} in {\{}Moderated{\}} {\{}Multiple{\}} {\{}Regres}},
title = {{Mean {\{}Centering{\}} in {\{}Moderated{\}} {\{}Multiple{\}} {\{}Regression{\}}: {\{}Much{\}} {\{}Ado{\}} about {\{}Nothing{\}}}},
url = {http://dx.doi.org/10.1177/0013164498058001005},
volume = {58},
year = {1998}
}
@article{rubin_inference_1976,
abstract = {SUMMARYWhen making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are ‘missing at random' and the observed data are ‘observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is ‘distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.},
author = {Rubin, Donald B.},
doi = {10.1093/biomet/63.3.581},
issn = {0006-3444},
journal = {Biometrika},
month = {dec},
number = {3},
pages = {581--592},
title = {{Inference and missing data}},
volume = {63},
year = {1976}
}
@article{muthen_general_1984,
author = {Muth{\'{e}}n, Bengt O},
journal = {Psychometrika},
pages = {115--132},
title = {{A general structural equation model with dichotomous, ordered categorical, and continuous latent variable structures.}},
volume = {49},
year = {1984}
}
@article{fife_witness:_2016,
author = {Fife, Dustin A.},
title = {{witness: {\{}Fits{\}} {\{}Eyewitness{\}} {\{}Data{\}} {\{}Using{\}} {\{}Clark{\}}'s (2003) {\{}WITNESS{\}} {\{}Model{\}}}},
url = {https://cran.r-project.org/src/contrib/Archive/witness/},
year = {2016}
}
@article{cumming_inference_2005,
abstract = {Wider use in psychology of confidence intervals (CIs), especially as error bars in figures, is a desirable development. However, psychologists seldom use CIs and may not understand them well. The authors discuss the interpretation of figures with error bars and analyze the relationship between CIs and statistical significance testing. They propose 7 rules of eye to guide the inferential use of figures with error bars. These include general principles: Seek bars that relate directly to effects of interest, be sensitive to experimental design, and interpret the intervals. They also include guidelines for inferential interpretation of the overlap of CIs on independent group means. Wider use of interval estimation in psychology has the potential to improve research communication substantially.},
author = {Cumming, Geoff and Finch, Sue},
doi = {10.1037/0003-066X.60.2.170},
issn = {0003-066X},
journal = {The American Psychologist},
keywords = {Confidence Intervals,Humans},
month = {mar},
number = {2},
pages = {170--180},
pmid = {15740449},
shorttitle = {Inference by eye},
title = {{Inference by eye: confidence intervals and how to read pictures of data}},
volume = {60},
year = {2005}
}
@article{Shiffrin2018,
abstract = {It appears paradoxical that science is producing outstanding new results and theories at a rapid rate at the same time that researchers are identifying serious problems in the practice of science that cause many reports to be irreproducible and invalid. Certainly, the practice of science needs to be improved, and scientists are now pursuing this goal. However, in this perspective, we argue that this seeming paradox is not new, has always been part of the way science works, and likely will remain so. We first introduce the paradox. We then review a wide range of challenges that appear to make scientific success difficult. Next, we describe the factors that make science work—in the past, present, and pre-sumably also in the future. We then suggest that remedies for the present practice of science need to be applied selectively so as not to slow progress and illustrate with a few examples. We conclude with arguments that communication of science needs to empha-size not just problems but the enormous successes and benefits that science has brought and is now bringing to all elements of modern society. scientometrics | science history | scientific progress | reproducibility | science communication},
archivePrefix = {arXiv},
arxivId = {1710.01946},
author = {Shiffrin, Richard M. and B{\"{o}}rner, Katy and Stigler, Stephen M.},
doi = {10.1073/pnas.1711786114},
eprint = {1710.01946},
file = {::},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences},
number = {11},
pmid = {29531095},
title = {{Scientific progress despite irreproducibility: A seeming paradox}},
volume = {115},
year = {2018}
}
@article{Perezgonzalez2017a,
abstract = {Seeking to address the lack of research reproducibility in science, including psychology and the life sciences, a pragmatic solution has been raised recently:  to use a stricter p {\textless} 0.005 standard for statistical significance when claiming evidence of new discoveries. Notwithstanding its potential impact, the proposal has motivated a large mass of authors to dispute it from different philosophical and methodological angles. This article reflects on the original argument and the consequent counterarguments, and concludes with a simpler and better-suited alternative that the authors of the proposal knew about and, perhaps, should have made from their Jeffresian perspective: to use a Bayes factors analysis in parallel (e.g., via JASP) in order to learn more about frequentist error statistics and about Bayesian prior and posterior beliefs without having to mix inconsistent research philosophies.},
author = {Perezgonzalez, Jose D. and Fr{\'{i}}as-Navarro, M. Dolores},
doi = {10.12688/f1000research.13389.1},
issn = {2046-1402},
journal = {F1000Research},
pmid = {29333250},
title = {{Retract p {\textless} 0.005 and propose using JASP, instead}},
volume = {6},
year = {2017}
}
@article{Chopik2018,
abstract = {Over the past 10 years, crises surrounding replication, fraud, and best practices in research methods have dominated discussions in the field of psychology. However, no research exists examining how to communicate these issues to undergraduates and what effect this has on their attitudes toward the field. We developed and validated a 1-hr lecture communicating issues surrounding the replication crisis and current recommendations to increase reproducibility. Pre- and post-lecture surveys suggest that the lecture serves as an excellent pedagogical tool. Following the lecture, students trusted psychological studies slightly less but saw greater similarities between psychology and natural science fields. We discuss challenges for instructors taking the initiative to communicate these issues to undergraduates in an evenhanded way.},
annote = {Pretty lame article. We just show what happens pre-versus post to a bunch of students who were talking about the replication crisis. He even used significance testing for their research. Lame.},
author = {Chopik, William J. and Bremner, Ryan H. and Defever, Andrew M. and Keller, Victor N.},
doi = {10.1177/0098628318762900},
file = {::},
issn = {00986283},
journal = {Teaching of Psychology},
keywords = {Ethics, erodes trust,reform, student training,teachers of statistics},
mendeley-tags = {Ethics, erodes trust,reform, student training,teachers of statistics},
number = {2},
title = {{How (and Whether) to Teach Undergraduates About the Replication Crisis in Psychological Science}},
volume = {45},
year = {2018}
}
@article{Quintana2018,
abstract = {Echocardiography is the commonest form of non-invasive cardiac imaging but due to its methodology, it is operator dependent. Numerous advances in technology have resulted in the development of interactive programs and simulators to teach trainees the skills to perform particular procedures, including transthoracic and transoesophageal echocardiography.},
author = {Quintana, Daniel S. and Williams, Donald R.},
doi = {10.1186/s12888-018-1761-4},
file = {::},
isbn = {9783540850694},
issn = {1471244X},
journal = {BMC Psychiatry},
number = {1},
pmid = {22503171},
title = {{Bayesian alternatives for common null-hypothesis significance tests in psychiatry: A non-technical guide using JASP}},
volume = {18},
year = {2018}
}
@article{Szucs2017a,
abstract = {Null hypothesis significance testing (NHST) has several shortcomings that are likely contributing factors behind the widely debated replication crisis of (cognitive) neuroscience, psychology and biomedical science in general. We review these shortcomings and suggest that, after about 60 years of negative experience, NHST should no longer be the default, dominant statistical practice of all biomedical and psychological research. If theoretical predictions are weak we should not rely on all or nothing hypothesis tests. Different inferential methods may be most suitable for different types of research questions. Whenever researchers use NHST they should justify its use, and publish pre-study power calculations and effect sizes, including negative findings. Hypothesis-testing studies should be pre-registered and optimally raw data published. The current statistics lite educational approach for students that has sustained the widespread, spurious use of NHST should be phased out. Instead, we should encourage either more in-depth statistical training of more researchers and/or more widespread involvement of professional statisticians in all research.},
author = {Szucs, Denes and Ioannidis, John P. A.},
doi = {10.3389/fnhum.2017.00390},
file = {::},
isbn = {1662-5161},
issn = {1662-5161},
journal = {Frontiers in Human Neuroscience},
pmid = {28824397},
title = {{When Null Hypothesis Significance Testing Is Unsuitable for Research: A Reassessment}},
volume = {11},
year = {2017}
}
@article{Gelman2014a,
abstract = {COMPUTING SCIENCE. . Brian Hayes. Look upon the phenomenon of war with dispassion and detachment, as if},
annote = {Good article. I might in the future replace the original P Hackin article with this one. I think it does a better job of explaining how many forking paths there could be for a single hypothesis. Also, they give a couple of good suggestions at the end for how to combat this problem aside from pre-registration, such as analyzing multiple methods of the same hypothesis and reporting them all.},
author = {Gelman, Andrew and Loken, Eric},
doi = {10.1511/2014.111.460},
file = {::},
isbn = {2136240900},
issn = {00030996},
journal = {American Scientist},
keywords = {EDA: recommended,EDA: transparency,decision, theory driven,p-hacking,p-hacking, intentions,p-values, intentions,p-values, multiple testing,pre-registration, criticisms},
mendeley-tags = {EDA: recommended,EDA: transparency,decision, theory driven,p-hacking,p-hacking, intentions,p-values, intentions,p-values, multiple testing,pre-registration, criticisms},
number = {6},
pmid = {98748686},
title = {{The statistical Crisis in science}},
volume = {102},
year = {2014}
}
@article{Edwards2010,
abstract = {In management research, theory testing confronts a paradox described by Meehl in which designing studies with greater methodological rigor puts theories at less risk of falsification. This paradox exists because most management theories make predictions that are merely directional, such as stating that two variables will be positively or negatively related. As methodological rigor increases, the probability that an estimated effect will differ from zero likewise increases, and the likelihood of finding support for a directional prediction boils down to a coin toss. This paradox can be resolved by developing theories with greater precision, such that their propositions predict something more meaningful than deviations from zero. This article evaluates the precision of theories in management research, offers guidelines for making theories more precise, and discusses ways to overcome barriers to the pursuit of theoretical precision.},
annote = {Such a good article. Gives 5(?) ways to increase the precision of hypotheses and argues for more meaningful metrics, embracing negative results, and spending more time refining theories. So good.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Edwards, Jeffrey R. and Berry, James W.},
doi = {10.1177/1094428110380467},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {10944281},
issn = {10944281},
journal = {Organizational Research Methods},
keywords = {NHST: falsification failure,benchmarks,cumulative evidence,decision criteria, use judgment,decision, theory driven,dichotomous decision making, problems with,effect sizes, unstandardized,hypotheses:specific,judgment,judgment: circumstantial,meehl,metrics, meaningful,null results: prevalence in science,p-values, blanket threshold,philosophy of science,priors, obtained from literature,project: specific hypotheses,quantitative research,region of practical equivalence,theory development,theory, pluralism,theory-data link},
mendeley-tags = {NHST: falsification failure,benchmarks,cumulative evidence,decision criteria, use judgment,decision, theory driven,dichotomous decision making, problems with,effect sizes, unstandardized,hypotheses:specific,judgment,judgment: circumstantial,meehl,metrics, meaningful,null results: prevalence in science,p-values, blanket threshold,priors, obtained from literature,project: specific hypotheses,region of practical equivalence,theory, pluralism,theory-data link},
number = {4},
pages = {668--689},
pmid = {147},
title = {{The Presence of Something or the Absence of Nothing: Increasing Theoretical Precision in Management Research}},
volume = {13},
year = {2010}
}
@article{Albers2018,
abstract = {The debate between Bayesians and frequentist statisticians has been going on for decades. Whilst there are fundamental theoretical and philosophical differences between both schools of thought, we argue that in two most common situations the practical differences are negligible when off-the-shelf Bayesian analysis (i.e., using 'objective' priors) is used. We emphasize this reasoning by focusing on interval estimates: confidence intervals and credible intervals. We show that this is the case for the most common empirical situations in the social sciences, the estimation of a proportion of a binomial distribution and the estimation of the mean of a unimodal distribution. Numerical differences between both approaches are small, sometimes even smaller than those between two competing frequentist or two competing Bayesian approaches. We outline the ramifications of this for scientific practice.},
annote = {Not bad. Just showed that confidence and credible intervals are really similar, becoming more similar with larger sample sizes. This tends not to be the case when informative priors are used OR the distribution of the posterior is not normal.},
author = {Albers, Casper J and Kiers, Henk A L and {Van Ravenzwaaij}, Don},
doi = {10.1525/collabra.149},
file = {::},
keywords = {Bayesian statistics,bayesian, credible intervals,confidence interval,credible interval,frequentist statistics},
mendeley-tags = {bayesian, credible intervals},
title = {{Credible Confidence: A Pragmatic View on the Frequentist vs Bayesian Debate}},
url = {https://doi.org/10.1525/collabra.149},
year = {2018}
}
@techreport{Hanema,
abstract = {In the social sciences, it is often of great importance to clearly uncover causal relationships. However, many researchers are unaware of the issue of endogeneity which biases estimates of causal effects. The instrumental variable (IV)-approach solves endogeneity and can make a convincing argument for causality even with cross-sectional data, but has been under-utilized in the social sciences. This paper explains the IV-approach and provides an example of application of this method to the psychological research question of causality between self-esteem and depression. The main argument is that the IV-approach is applicable to and deserves to see more use in psychology.},
author = {Hanema, Hugo},
file = {::},
keywords = {Causality,endogeneity,instrumental variables},
title = {{Instrumental variables in psychology}}
}
@techreport{Vosgerau,
abstract = {Word count abstract: 173 Word count main text (without references): 4977},
author = {Vosgerau, Joachim and Simonsohn, Uri and Nelson, Leif D. and Simmons, Joseph P},
file = {::},
title = {{Internal Meta-Analysis Makes False-Positives Easier To Produce and Harder To Correct}}
}
@article{Nuzzo2015,
archivePrefix = {arXiv},
arxivId = {15334406},
author = {Nuzzo, Regina},
eprint = {15334406},
file = {::},
isbn = {1223326500},
journal = {Nature},
keywords = {Ethics,Holocene,Little ice age,Pampean lakes,biases,collaboration,competing goals of a study,conflict of interest,data audits,ethics,evidence of,illusion of certainty,intentions,inviting criticism,p-hacking,pre-registration,recognizing,reform,seek truth,skepticism,transparency,uncertainty,with adversaries},
mendeley-tags = {Ethics,biases,collaboration,competing goals of a study,conflict of interest,data audits,ethics,evidence of,illusion of certainty,intentions,inviting criticism,p-hacking,pre-registration,recognizing,reform,seek truth,skepticism,transparency,uncertainty,with adversaries},
month = {oct},
number = {7572},
pages = {182--185},
pmid = {16293742},
title = {{How scientists fool themselves – and how they can stop}},
url = {http://www.nature.com/doifinder/10.1038/526182a},
volume = {526},
year = {2015}
}
@article{Bollen2002,
abstract = {▪ Abstract The paper discusses the use of latent variables in psychology and social science research. Local independence, expected value true scores, and nondeterministic functions of observed variables are three types of definitions for latent variables. These definitions are reviewed and an alternative “sample realizations” definition is presented. Another section briefly describes identification, latent variable indeterminancy, and other properties common to models with latent variables. The paper then reviews the role of latent variables in multiple regression, probit and logistic regression, factor analysis, latent curve models, item response theory, latent class analysis, and structural equation models. Though these application areas are diverse, the paper highlights the similarities as well as the differences in the manner in which the latent variables are defined and used. It concludes with an evaluation of the different definitions of latent variables and their properties.},
author = {Bollen, Kenneth A.},
doi = {10.1146/annurev.psych.53.100901.135239},
issn = {0066-4308},
journal = {Annual Review of Psychology},
keywords = {concepts,constructs,residuals,true scores,unmeasured variables,unobserved variables},
month = {feb},
number = {1},
pages = {605--634},
publisher = { Annual Reviews  4139 El Camino Way, P.O. Box 10139, Palo Alto, CA 94303-0139, USA  },
title = {{Latent Variables in Psychology and the Social Sciences}},
url = {http://www.annualreviews.org/doi/10.1146/annurev.psych.53.100901.135239},
volume = {53},
year = {2002}
}
@article{Hamilton2003,
abstract = {The field of strategic management is predicated fundamentally on the idea that managements' decisions are endogenous to their expected performance implications. Yet, based on a review of more than a decade of empirical research in the Strategic Management Journal, we find that few papers econometrically correct for such endogeneity. In response, we now describe the endogeneity problem for cross-sectional and panel data, referring specifically to management's choice among discrete strategies with continuous performance outcomes. We then present readily implementable econometric methods to correct for endogeneity and, when feasible, provide STATA code to ease implementation. We also discuss extensions and nuances of these models that are sometimes difficult to decipher in more standard treatments. These extensions are not typically discussed in the strategy literature, but they are, in fact, highly pertinent to empirical strategic management research.},
author = {Hamilton, Barton H. and Nickerson, Jackson A.},
doi = {10.1177/1476127003001001218},
issn = {1476-1270},
journal = {Strategic Organization},
keywords = {endogeneity,strategy choice,unobserved heterogeneity},
month = {feb},
number = {1},
pages = {51--78},
publisher = {Sage PublicationsSage CA: Thousand Oaks, CA},
title = {{Correcting for Endogeneity in Strategic Management Research}},
url = {http://journals.sagepub.com/doi/10.1177/1476127003001001218},
volume = {1},
year = {2003}
}
@article{Duncan2004,
abstract = {Estimates of developmental models of processes involving contextual influences (e.g., child care arrangements, divorce, parenting, neighborhood location, peers) are subject to bias if, as is often the case, the contexts are influenced by the actions of either the individuals being studied or their parents or teachers. We assessed the nature of the endogeneity biases that may result, discuss the importance of such biases in practice, and suggest possible ways of avoiding them. Our primary recommendation is that developmentalists consider reorienting their data collection strategies to take advantage of real or "natural" experiments that produce exogenous variation in family and contextual variables of interest.},
author = {Duncan, Greg J. and Magnuson, Katherine A. and Ludwig, Jens},
doi = {10.1080/15427609.2004.9683330},
issn = {1542-7609},
journal = {Research in Human Development},
month = {mar},
number = {1-2},
pages = {59--80},
publisher = { Lawrence Erlbaum Associates, Inc. },
title = {{The Endogeneity Problem in Developmental Studies}},
url = {http://www.tandfonline.com/doi/abs/10.1080/15427609.2004.9683330},
volume = {1},
year = {2004}
}
@article{Albers2018a,
abstract = {{\textless}p class="p1"{\textgreater}The debate between Bayesians and frequentist statisticians has been going on for decades. Whilst there are fundamental theoretical and philosophical differences between both schools of thought, we argue that in two most common situations the practical differences are negligible when off-the-shelf Bayesian analysis (i.e., using ‘objective' priors) is used. We emphasize this reasoning by focusing on interval estimates: confidence intervals and credible intervals. We show that this is the case for the most common empirical situations in the social sciences, the estimation of a proportion of a binomial distribution and the estimation of the mean of a unimodal distribution. Numerical differences between both approaches are small, sometimes even smaller than those between two competing frequentist or two competing Bayesian approaches. We outline the ramifications of this for scientific practice.{\textless}/p{\textgreater}},
author = {Albers, Casper J. and Kiers, Henk A. L. and van Ravenzwaaij, Don and Vazire, Simine and Savalei, Victoria},
doi = {10.1525/collabra.149},
issn = {2474-7394},
journal = {Collabra: Psychology},
keywords = {Bayesian statistics,confidence interval,credible interval,frequentist statistics},
month = {aug},
number = {1},
publisher = {The Regents of the University of California},
title = {{Credible Confidence: A Pragmatic View on the Frequentist vs Bayesian Debate}},
url = {http://collabra.org/articles/10.1525/collabra.149/},
volume = {4},
year = {2018}
}
@misc{Vosgerau2018,
author = {Vosgerau, Joachim and Simonsohn, Uri and Nelson, Leif D. and Simmons, Joseph P.},
file = {::},
title = {{Don't Trust Internal Meta-Analysis}},
url = {http://datacolada.org/73},
year = {2018}
}
@misc{Halsey2015,
abstract = {The reliability and reproducibility of science are under scrutiny. However, a major cause of this lack of repeatability is not being considered: the wide sample-to-sample variability in the P value. We explain why P is fickle to discourage the ill-informed practice of interpreting analyses based predominantly on this statistic.},
archivePrefix = {arXiv},
arxivId = {NIHMS150003},
author = {Halsey, Lewis G. and Curran-Everett, Douglas and Vowler, Sarah L. and Drummond, Gordon B.},
booktitle = {Nature Methods},
doi = {10.1038/nmeth.3288},
eprint = {NIHMS150003},
isbn = {1548-7091},
issn = {15487105},
number = {3},
pmid = {25719825},
title = {{The fickle P value generates irreproducible results}},
volume = {12},
year = {2015}
}
@article{Coulson2010,
abstract = {A statistically significant result, and a non-significant result may differ little, although significance status may tempt an interpretation of difference. Two studies are reported that compared interpretation of such results presented using null hypothesis significance testing (NHST), or confidence intervals (CIs). Authors of articles published in psychology, behavioral neuroscience, and medical journals were asked, via email, to interpret two fictitious studies that found similar results, one statistically significant, and the other non-significant. Responses from 330 authors varied greatly, but interpretation was generally poor, whether results were presented as CIs or using NHST. However, when interpreting CIs respondents who mentioned NHST were 60{\%} likely to conclude, unjustifiably, the two results conflicted, whereas those who interpreted CIs without reference to NHST were 95{\%} likely to conclude, justifiably, the two results were consistent. Findings were generally similar for all three disciplines. An email survey of academic psychologists confirmed that CIs elicit better interpretations if NHST is not invoked. Improved statistical inference can result from encouragement of meta-analytic thinking and use of CIs but, for full benefit, such highly desirable statistical reform requires also that researchers interpret CIs without recourse to NHST.},
author = {Coulson, Melissa and Healey, Michelle and Fidler, Fiona and Cumming, Geoff},
doi = {10.3389/fpsyg.2010.00026},
file = {::},
issn = {1664-1078},
journal = {Frontiers in psychology},
keywords = {cognition,confidence intervals,email survey,meta-analytic thinking,statistical inference},
pages = {26},
pmid = {21607077},
publisher = {Frontiers Media SA},
title = {{Confidence intervals permit, but do not guarantee, better inference than statistical significance testing.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/21607077 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3095378},
volume = {1},
year = {2010}
}
@article{Hullman2018,
abstract = {Abstract: Proteins diffuse to their sites of action within cells in a crowded, strongly interacting environment of nucleic acids and other macromolecules. To investigate the dynamics of a typical globular protein in such an environment, we used fluorescence photobleaching recovery to measure the probe diffusion of green fluorescent protein (GFP) in dilute to highly concentrated aqueous solutions of glycerol, Ficoll 70, and persistence-length calf thymus DNA. In glycerol, GFP accurately obeyed the Stokes-Einstein equation that relates diffusion coefficient to solution viscosity. In concentrated Ficoll 70, GFP diffused moderately faster than predicted from viscosity, demonstrating the phenomenon of microviscosity in a molecularly heterogeneous solution. In DNA, the diffusion coefficient of GFP was markedly greater than predicted from the Stokes-Einstein equation, with deviations increasing at lower ionic strength. This behavior reflects microviscosity, Coulombic interactions, and the dynamics of probe diffusion in DNA solutions that had undergone the ordinary-extraordinary transition, which we demonstrated by dynamic light scattering.},
author = {Hullman, Jessica and Qiao, Xiaoli and Correll, Michael and Kale, Alex and Kay, Matthew},
doi = {10.1109/TVCG.2018.2864889},
file = {::},
issn = {19410506},
journal = {IEEE Transactions on Visualization and Computer Graphics},
keywords = {Data visualization,Decision making,Measurement uncertainty,Task analysis,Taxonomy,Uncertainty,Uncertainty visualization,Visualization,probability distribution,subjective confidence,user study},
number = {c},
pages = {1},
publisher = {IEEE},
title = {{In Pursuit of Error: A Survey of Uncertainty Visualization Evaluation}},
volume = {PP},
year = {2018}
}
@article{Gelman2018,
abstract = {The inherently nonlinear interaction between light and motion in cavity optomechanical systems has experimentally been studied in a linearized description in all except highly driven cases. Here we demonstrate a nanoscale optomechanical system, in which the interaction between light and motion is so large (single-photon cooperativity {\$}C{\_}0 \backslashapprox 10{\^{}}3{\$}) that thermal motion induces optical frequency fluctuations larger than the intrinsic optical linewidth. The system thereby operates in a fully nonlinear regime, which pronouncedly impacts the optical response, displacement measurement, and radiation pressure backaction. Experiments show that the apparent optical linewidth is dominated by thermomechanically-induced frequency fluctuations over a wide temperature range. The nonlinearity induces breakdown of the traditional cavity optomechanical descriptions of thermal displacement measurements. Moreover, we explore how radiation pressure backaction in this regime affects the mechanical fluctuation spectra. The strong nonlinearity could serve as a resource to control the motional state of the resonator. We demonstrate the use of highly nonlinear transduction to perform a quadratic measurement of position while suppressing linear transduction.},
archivePrefix = {arXiv},
arxivId = {1612.08072},
author = {Gelman, Andrew},
doi = {10.1038/ncomms16024},
eprint = {1612.08072},
file = {::},
isbn = {9781557528209},
issn = {20411723},
journal = {Significance Magazine},
title = {{Ethics in statistical practice and communication: Five recommendations}},
year = {2018}
}
@inproceedings{Nielsen1990,
address = {New York, New York, USA},
author = {Nielsen, Jakob and Molich, Rolf},
booktitle = {Proceedings of the SIGCHI conference on Human factors in computing systems Empowering people - CHI '90},
doi = {10.1145/97243.97281},
file = {::},
isbn = {0201509326},
pages = {249--256},
publisher = {ACM Press},
title = {{Heuristic evaluation of user interfaces}},
url = {http://portal.acm.org/citation.cfm?doid=97243.97281},
year = {1990}
}
@article{Samaras2005,
abstract = {The discipline of systems engineering, over the past five decades, has used a structured systematic approach to managing the “cradle to grave” development of products and processes. While elements of this approach are typically used to guide the development of information systems that instantiate a significant user interface, it appears to be rare for the entire process to be implemented. In fact, a number of authors have put forth development lifecycle models that are subsets of the classical systems engineering method, but fail to include steps such as incremental hazard analysis and post-deployment corrective and preventative actions. In that most health information systems have safety implications, we argue that the design and development of such systems would benefit by implementing this systems engineering approach in full. Particularly with regard to bringing a human-centered perspective to the formulation of system requirements and the configuration of effective user interfaces, this classical systems engineering method provides an excellent framework for incorporating human factors (ergonomics) knowledge and integrating ergonomists in the interdisciplinary development of health information systems.},
author = {Samaras, George M. and Horst, Richard L.},
doi = {10.1016/J.JBI.2004.11.013},
file = {::},
issn = {1532-0464},
journal = {Journal of Biomedical Informatics},
month = {feb},
number = {1},
pages = {61--74},
publisher = {Academic Press},
title = {{A systems engineering perspective on the human-centered design of health information systems}},
url = {https://www.sciencedirect.com/science/article/pii/S1532046404001662},
volume = {38},
year = {2005}
}
@article{Zhang2005,
abstract = {Usability testing of software applications developed for mobile devices is an emerging research area that faces a variety of challenges due to unique features of mobile devices, limited bandwidth, unreliability of wireless networks, as well as changing context (environmental factors). Traditional guidelines and methods used in usability testing of desktop applications may not be directly applicable to a mobile environment. Therefore, it is essential to develop and adopt appropriate research methodologies that can evaluate the usability of mobile applications. The contribution of this article is to propose a generic framework for conducting usability tests for mobile applications through discussing research questions, methodologies, and usability attributes. The article provides an overview of existing mobil application usability studies and discusses major research questions that have been investigated. Then, it proposes a generic framework and provides detailed guidelines on how to conduct such usability...},
author = {Zhang, Dongsong and Adipat, Boonlit},
doi = {10.1207/s15327590ijhc1803_3},
issn = {1044-7318},
journal = {International Journal of Human-Computer Interaction},
month = {jul},
number = {3},
pages = {293--308},
publisher = { Lawrence Erlbaum Associates, Inc. },
title = {{Challenges, Methodologies, and Issues in the Usability Testing of Mobile Applications}},
url = {http://www.tandfonline.com/doi/abs/10.1207/s15327590ijhc1803{\_}3},
volume = {18},
year = {2005}
}
@inproceedings{Nielsen1994,
address = {New York, New York, USA},
author = {Nielsen, Jakob and Jakob},
booktitle = {Conference companion on Human factors in computing systems  - CHI '94},
doi = {10.1145/259963.260531},
file = {::},
isbn = {0897916514},
pages = {413--414},
publisher = {ACM Press},
title = {{Usability inspection methods}},
url = {http://portal.acm.org/citation.cfm?doid=259963.260531},
year = {1994}
}
@article{Coyne2016,
abstract = {Replication initiatives in psychology continue to gather considerable attention from far outside the field, as well as controversy from within. Some accomplishments of these initiatives are noted, but this article focuses on why they do not provide a general solution for what ails psychology. There are inherent limitations to mass replications ever being conducted in many areas of psychology, both in terms of their practicality and their prospects for improving the science. Unnecessary compromises were built into the ground rules for design and publication of the Open Science Collaboration: Psychology that undermine its effectiveness. Some ground rules could actually be flipped into guidance for how not to conduct replications. Greater adherence to best publication practices, transparency in the design and publishing of research, strengthening of independent post-publication peer review and firmer enforcement of rules about data sharing and declarations of conflict of interest would make many replications unnecessary. Yet, it has been difficult to move beyond simple endorsement of these measures to consistent implementation. Given the strong institutional support for questionable publication practices, progress will depend on effective individual and collective use of social media to expose lapses and demand reform. Some recent incidents highlight the necessity of this.},
annote = {Very good article. Advocates for post publication review and suggests the replication movement is misguided. What really needs to be corrected is the journal publication process.},
author = {Coyne, James C.},
doi = {10.1186/s40359-016-0134-3},
file = {::},
isbn = {4035901601},
issn = {20507283},
journal = {BMC Psychology},
keywords = {EDA and p-values,P-hacking,Publication bias,Randomized controlled trials,Reproducibility,change: grassroots,change: top-down,ethics, skepticism,failed replications, burden of proof,not enough,nothings changing,post publication review,pre-registration,pre-registration, criticisms,reform,replication, direct, disadvantages,replication: criticisms,social media,whistleblowing},
mendeley-tags = {EDA and p-values,change: grassroots,change: top-down,ethics, skepticism,failed replications, burden of proof,not enough,nothings changing,post publication review,pre-registration,pre-registration, criticisms,reform,replication, direct, disadvantages,replication: criticisms,social media,whistleblowing},
pmid = {27245324},
title = {{Replication initiatives will not salvage the trustworthiness of psychology}},
year = {2016}
}
@article{Lilienfeld2017,
abstract = {The past several years have been a time for soul searching in psychology, as we have gradually come to grips with the reality that some of our cherished findings are less robust than we had assumed. Nevertheless, the replication crisis highlights the operation of psychological science at its best, as it reflects our growing humility. At the same time, institutional variables, especially the growing emphasis on external funding as an expectation or de facto requirement for faculty tenure and promotion, pose largely unappreciated hazards for psychological science, including (a) incentives for engaging in questionable research practices, (b) a single-minded focus on programmatic research, (c) intellectual hyperspecialization, (d) disincentives for conducting direct replications, (e) stifling of creativity and intellectual risk taking, (f) researchers promising more than they can deliver, and (g) diminished time for thinking deeply. Preregistration should assist with (a), but will do little about (b) through ...},
annote = {Big take home message: grants have a few major negative effects on research and not all of them are fixed by pre registration.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Lilienfeld, Scott O.},
doi = {10.1177/1745691616687745},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {0007-0912$\backslash$r1471-6771},
issn = {17456924},
journal = {Perspectives on Psychological Science},
keywords = {change: top-down,confirmation bias,grant funding: problems of,grants,meehl,not enough,openness and transparency, not enough,pre-registration,pre-registration, criticisms,preregistration,reform,replication,replication crisis: causes,replication crisis: soul searching,replication, not enough},
mendeley-tags = {change: top-down,grant funding: problems of,meehl,not enough,openness and transparency, not enough,pre-registration,pre-registration, criticisms,reform,replication crisis: causes,replication crisis: soul searching,replication, not enough},
pmid = {28727961},
title = {{Psychology's Replication Crisis and the Grant Culture: Righting the Ship}},
year = {2017}
}
@article{Everett2015,
abstract = {Several proposals for addressing the “replication crisis” in social psychology have been advanced in the recent literature. In this paper, we argue that the “crisis” be interpreted as a disciplinary social dilemma, with the problem facing early-career researchers being especially acute. To resolve this collective action problem, we offer a structural solution: as a condition of receiving their Ph.D. from any accredited institution, graduate students in psychology should be required to conduct, write up, and submit for publication a high-quality replication attempt of at least one key finding from the literature, focusing on the area of their doctoral research. We consider strengths, weaknesses, and implementation challenges associated with this proposal, and call on our colleagues to offer critical response.},
author = {Everett, Jim A. C. and Earp, Brian D.},
doi = {10.3389/fpsyg.2015.01152},
file = {::},
isbn = {1664-1078},
issn = {1664-1078},
journal = {Frontiers in Psychology},
pmid = {26300832},
title = {{A tragedy of the (academic) commons: interpreting the replication crisis in psychology as a social dilemma for early-career researchers}},
year = {2015}
}
@article{Everett2015a,
abstract = {Several proposals for addressing the “replication crisis” in social psychology have been advanced in the recent literature. In this paper, we argue that the “crisis” be interpreted as a disciplinary social dilemma, with the problem facing early-career researchers being especially acute. To resolve this collective action problem, we offer a structural solution: as a condition of receiving their Ph.D. from any accredited institution, graduate students in psychology should be required to conduct, write up, and submit for publication a high-quality replication attempt of at least one key finding from the literature, focusing on the area of their doctoral research. We consider strengths, weaknesses, and implementation challenges associated with this proposal, and call on our colleagues to offer critical response.},
annote = {Meh. Some good references, but the idea isn't particularly novel (having graduate students publish replications) nor do I think it will work. I think a better approach is to always include a replication + new data when collecting new data (e.g., do the same manipulation, but measure additional variables or do a conceptual replication).},
author = {Everett, Jim A. C. and Earp, Brian D.},
doi = {10.3389/fpsyg.2015.01152},
file = {::},
isbn = {1664-1078},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Ethics, conflict of interest,change: grassroots,change: top-down,social pressures: negative affects},
mendeley-tags = {Ethics, conflict of interest,change: grassroots,change: top-down,social pressures: negative affects},
pmid = {26300832},
title = {{A tragedy of the (academic) commons: interpreting the replication crisis in psychology as a social dilemma for early-career researchers}},
year = {2015}
}
@article{Fanelli2018,
abstract = {Efforts to improve the reproducibility and integrity of science are typically justified by a narrative of crisis, according to which most published results are unreliable due to growing problems with research and publication practices. This article provides an overview of recent evidence suggesting that this narrative is mistaken, and argues that a narrative of epochal changes and empowerment of scientists would be more accurate, inspiring, and compelling.},
annote = {Meh. The authors argue there's no replication crisis and that such language is counterproductive (as they invite cynicism among new researchers). I disagree. It inspires young researchers to action!},
author = {Fanelli, Daniele},
doi = {10.1073/pnas.1708272114},
file = {::},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences},
keywords = {p-hacking, evidence against,replication crisis: belief it exists,replication: criticisms},
mendeley-tags = {p-hacking, evidence against,replication crisis: belief it exists,replication: criticisms},
pmid = {29531051},
title = {{Opinion: Is science really facing a reproducibility crisis, and do we need it to?}},
year = {2018}
}
@article{Figdor2017,
annote = {Didn't read the whole thing. It's written for journalists and basically talks about how journalists shouldn't trust the peer review process anymore.},
author = {Figdor, Carrie},
doi = {10.3389/fcomm.2017.00003},
file = {::},
issn = {2297-900X},
journal = {Frontiers in Communication},
keywords = {Ethics, erodes trust},
mendeley-tags = {Ethics, erodes trust},
title = {{(When) Is Science Reporting Ethical? The Case for Recognizing Shared Epistemic Responsibility in Science Journalism}},
year = {2017}
}
@article{Saltelli2017,
abstract = {Present day reasoning about difficulties in science reproducibility, science governance, and the use of science for policy could benefit from a philosophical and historical perspective. This would show that the present crisis was anticipated by some scholars of these disciplines, and that diagnoses were offered which are not yet mainstream among crisis-aware disciplines, from statistics to medicine, from bibliometrics to biology. Diagnoses in turn open the path to possible solutions. This discussion is urgent given the impact of the crises on public trust in institutions. We ask whether the present crisis may be seminal in terms of drawing attention to alternative visions for the role of Science in society, and its relevant institutional arrangements. We finish by offering a number of suggestions in this direction.},
annote = {Good but esoteric article. Provides evidence there's some big pockets going into science, that it's highly politicized, that the incentive structure sucks, etc.},
author = {Saltelli, Andrea and Funtowicz, Silvio},
doi = {10.1016/j.futures.2017.05.010},
file = {::},
isbn = {0016-3287},
issn = {00163287},
journal = {Futures},
keywords = {Ethics, conflict of interest,Ethics, erodes trust,Extended participation,History and philosophy of science Science,Post-normal science,Reproducibility,Science and Technology Studies,Science's crisis,ethics, scientific elite},
mendeley-tags = {Ethics, conflict of interest,Ethics, erodes trust,ethics, scientific elite},
title = {{What is science's crisis really about?}},
year = {2017}
}
@techreport{Mathematics,
author = {Mathematics},
file = {::},
title = {{BRITISH MEDICAL JOURNAL VOLUME 281 MEDICAL PRACTICE Statistics and ethics in medical research}}
}
@article{Altman1980,
annote = {This paper talks about ethical use of statistics, from planning to execution and the consequences of it, but I don't think it goes far enough. It's mostly about making mistakes in analysis. },
author = {Altman, D G},
journal = {British Medical Journal},
keywords = {Ethics},
mendeley-tags = {Ethics},
number = {6249},
pages = {1182},
pmid = {7427629},
title = {{Statistics and ethics in medical research. Misuse of statistics is unethical.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/7427629 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC1714517},
volume = {281},
year = {1980}
}
@misc{Tippett2018,
author = {Tippett, Krista},
booktitle = {Civil Conversations Project},
file = {::},
title = {{Better Conversations: A starter guide}},
url = {https://static1.squarespace.com/static/52e04689e4b06ba19ad5a957/t/5a7e16f60852291995133e8b/1518212854325/onbeing{\_}ccp{\_}guide{\_}09February2018.pdf},
urldate = {2018-10-17},
year = {2018}
}
@article{Velleman2008,
abstract = {Statisticians and Statistics teachers often have to push back against the popular impression that Statistics teaches how to lie with data. Those who believe incorrectly that Statistics is solely a branch of Mathematics (and thus algorithmic), often see the use of judgment in Statistics as evidence that we do indeed manipulate our results. In the push to teach formulas and definitions, we may fail to emphasize the important role played by judgment. We should teach our students that they are personally responsible for the judgments they make. But we must also offer guidance for their statistical judgments. Such guidance requires that we acknowledge the role of ethics in Statistics. The principle guiding these judgments should be the honest search for truth about the world, and the principle of seeking such truth should have a central place in Statistics courses. (Contains 13 endnotes.)},
annote = {REALLY good article about the need for judgement in statistics and always linking back to theory. And it talks about ethics.},
author = {Velleman, Paul F.},
doi = {10.1080/10691898.2008.11889565},
file = {::},
issn = {10691898},
journal = {Journal of Statistics Education},
keywords = {Damn lies,Ethics,Statistics education,Twain,biases, recognizing,class reading,ethics, seek truth,ethics, skepticism,ethics, uncertainty,hiding behind status quo,judgment,teachers of statistics,theory-data link},
mendeley-tags = {biases, recognizing,class reading,ethics, seek truth,ethics, skepticism,ethics, uncertainty,hiding behind status quo,judgment,teachers of statistics,theory-data link},
title = {{Truth, damn truth, and statistics}},
year = {2008}
}
@article{Sijtsma2016,
abstract = {Recent fraud cases in psychological and medical research have emphasized the need to pay attention to Questionable Research Practices (QRPs). Deliberate or not, QRPs usually have a deteriorating effect on the quality and the credibility of research results. QRPs must be revealed but prevention of QRPs is more important than detection. I suggest two policy measures that I expect to be effective in improving the quality of psychological research. First, the research data and the research materials should be made publicly available so as to allow verification. Second, researchers should more readily consider consulting a methodologist or a statistician. These two measures are simple but run against common practice to keep data to oneself and overestimate one's methodological and statistical skills, thus allowing secrecy and errors to enter research practice.},
author = {Sijtsma, Klaas},
doi = {10.1007/s11336-015-9446-0},
issn = {00333123},
journal = {Psychometrika},
number = {1},
pmid = {25820980},
title = {{Playing with Data—Or How to Discourage Questionable Research Practices and Stimulate Researchers to Do Things Right}},
volume = {81},
year = {2016}
}
@misc{Collins2014,
abstract = {Francis S. Collins and Lawrence A. Tabak discuss initiatives that the US National Institutes of Health is exploring to restore the self-correcting nature of preclinical research.},
archivePrefix = {arXiv},
arxivId = {NIHMS150003},
author = {Collins, Francis S. and Tabak, Lawrence A.},
booktitle = {Nature},
doi = {10.1038/505612a},
eprint = {NIHMS150003},
isbn = {1476-4687 (Electronic)$\backslash$r0028-0836 (Linking)},
issn = {00280836},
number = {7485},
pmid = {24482835},
title = {{NIH plans to enhance reproducibility}},
volume = {505},
year = {2014}
}
@article{Anscombe1973,
author = {Anscombe, F. J.},
doi = {10.1080/00031305.1973.10478966},
issn = {0003-1305},
journal = {The American Statistician},
month = {feb},
number = {1},
pages = {17--21},
title = {{Graphs in Statistical Analysis}},
url = {http://www.tandfonline.com/doi/abs/10.1080/00031305.1973.10478966},
volume = {27},
year = {1973}
}
@article{Loftus1996,
author = {Loftus, Geoffrey},
doi = {10.1111/1467-8721.ep11512376},
file = {::},
journal = {Current Directions in Psychological Science},
number = {6},
pages = {161--171},
title = {{Psychology will be a much better science when we change the way we analyze data}},
url = {http://faculty.washington.edu/gloftus/Downloads/CurrentDirections.pdf},
volume = {5},
year = {1996}
}
@article{Vardeman2003,
author = {Vardeman, Stephen B. and Morris, Max D.},
file = {::},
journal = {The American Statistician},
number = {1},
pages = {21--26},
title = {{Statistics and Ethics: Some Advice for Young Statisticians on JSTOR}},
url = {https://www.jstor.org/stable/3087273?seq=1{\#}metadata{\_}info{\_}tab{\_}contents},
volume = {57},
year = {2003}
}
@article{Gelman2013a,
author = {Gelman, Andrew},
file = {::},
journal = {Chance},
number = {3},
pages = {49--52},
title = {{It's too hard to publish criticisms and obtain data for replication}},
url = {http://www.stat.columbia.edu/{~}gelman/research/published/ChanceEthics8.pdf},
volume = {26},
year = {2013}
}
@techreport{Gelman2013,
author = {Gelman, Andrew},
file = {::},
keywords = {effect sizes, inflation of,failed replications},
mendeley-tags = {effect sizes, inflation of,failed replications},
title = {{Is it possible to be an ethicist without being mean to people? 1}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.2509{\&}rep=rep1{\&}type=pdf},
year = {2013}
}
@article{Gelman2012a,
abstract = {H ere's an example of inconsistent behavior. A statisti-cian challenges a speeding ticket in court by arguing that the radar evidence was inconclusive and questioning whether the instrument was properly calibrated. Later in the day, the statistician watches a baseball game and is impressed with pitches clocked at 99 or 100 miles per hour. On the one hand, the statistician believes the TV net-work's radar gun can accurately peg, to the nearest mile per hour (mph), the speed of a small ball that is only visible for about 1 second. On the other, the statistician claims a police officer can't prove an SUV was trav-eling 20 mph over the speed limit on an open stretch of road. We want to explore another example of inconsistent behavior that's far more consequential. As statisticians, we give firm guidance in our consulting and research on the virtues of random sampling, randomized treatment assignments, valid and reliable measurements, and clear specification of the statistical procedures that will be applied to data. With self-assured confidence that we occupy the moral high ground, we share horror stories about convenience samples, selection bias, multiple comparisons, and other problems that arise when those less enlightened about proper methodology don't follow the rules. But are we really consistent in all aspects of our professional lives? How do we approach teaching? The following generalizations apply to most of us: We assign grades based on exams that would almost surely be revealed to be low in both reliability and validity if we were to ever actually examine their psychometric properties. Despite teaching the same courses year after year, we rarely use standardized tests. We almost never use pre-tests at the beginning of the semester, either to adjust for differences between students in different sections of a course or even for the more direct goal of assessing what has actually been learned by students in our classes. We evaluate teachers based on student evaluations which, in addition to all their problems as measuring instruments, are presumably subject to huge nonresponse biases. Would we tolerate client satisfaction surveys as the only measure of hospital quality? We try out new ideas haphazardly. Not only do we not do randomized experiments, we generally do not perform any systematic comparisons of treatments at all. As one high-level administrator put it to us recently, "It would be good if we introduced our new teaching methods based on something more than a 'hunch'."},
author = {Gelman, Andrew and Loken, Eric},
file = {::},
journal = {Chance},
pages = {47--48},
title = {{Statisticians: When We Teach, We Don't Practice What We Preach}},
url = {http://stat.columbia.edu/{~}gelman/research/published/ChanceEthics2.pdf},
volume = {25},
year = {2012}
}
@techreport{Gelman2012,
abstract = {1. The choice to not use all available information Debates about statistical foundations can be annoying to practitioners but are important in that foundational claims are used to make general recommendations for practice. All statistical methods allow prior information to be used in the design of a study, or in choosing what variables to include and how to transform them, or in the interpretation of results. What distinguishes Bayesian methods is the expression of prior information in the form of probability distributions on parameters in a model. But this is controversial. Most directly, technical arguments about the efficiency of different statistical procedures translate directly into ethical concerns. As quantitative researchers, we are supposed to use the most accurate estimates and the most honest statements of uncertainty, using statistically inferior methods only in response to other concerns such as simplicity, cost, or substantive theory. For example, in his "bread and peace" model, political scientist Douglas Hibbs forecasts presidential election outcomes given only two variables, one summarizing economic trends in the year or so leading up to the election and the other being a measure of military casualties during the president's term. The accuracy of the predictions is impressive given that the model is so simple-and that gives us insight into the politics of elections. Hibbs emphasizes that his model is not intended to produce the most accurate forecasts, which would require augmenting his predictors with other information such as recent polls. It would be unethical, or at best incompetent, to present Hibbs's model as a state-of-the-art forecast given that other useful information is available. But the fitted model stands on its own terms as a statement about elections. Hibbs's model is not Bayesian (at least, not explicitly so). It is relevant to our discussion here to illustrate that a model which is weak for one purpose can be strong for another. Similarly, as I have written elsewhere, "There is a class of problems where, for ethical or security reasons, it is illegal or inappropriate to use all available information. For example, the Census is not allowed to release fine-grained cross-tabulations that could be used to deduce information about individual people; racial profiling cannot be used in making mortgage decisions; and students' Calculus 1 grades and SAT scores would not be used in determining their grades in Calculus 2, even though these pieces of},
author = {Gelman, Andrew},
file = {::},
keywords = {bayesian: priors: importance of,biases, recognizing},
mendeley-tags = {bayesian: priors: importance of,biases, recognizing},
title = {{Ethics and the statistical use of prior information}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.259.1543{\&}rep=rep1{\&}type=pdf},
year = {2012}
}
@article{Gelman2014,
abstract = {O ne of our ongoing themes when discussing scientific ethics is the central role of statistics in recognizing and communicating uncertainty. Unfortunately, statistics-and the scientific process more generally-often seems to be used more as a way of laundering uncertainty, processing data until researchers and consumers of research can feel safe acting as if various scientific hypotheses are unquestionably true. A recent article in the Economist asked whether science was self-correcting, focusing on failures to replicate and the widespread concern that many areas of the scientific literature are not as reliable as we want to believe. This wasn't the first time we've read in the financial press about an overheated domain, awash in liquidity and expanding quickly, driven by its own internal logic, where risk was understated and under-appreciated, but where professionals and consumers alike continued to buy in and operate with a faith that the good times would go on forever. We have in mind an analogy with the notorious AAA-class bonds created during the mid-2000s that led to the subprime mortgage crisis. Lower-quality mortgages-that is, mortgages with high probability of default and, thus, high uncertainty-were packaged and transformed into financial instruments that were (in retrospect, falsely) characterized as low risk. There was a tremendous interest in these securities, not just among the most unscrupulous market manipulators, but in a world where a lot of money was looking for safe investments and investors were willing to believe the ratings agencies and brokers.},
author = {Gelman, Andrew and Loken, Eric},
file = {::},
journal = {Chance},
number = {1},
pages = {51--56},
title = {{[Ethics and Statistics] The AAA Tranche of Subprime Science}},
url = {https://pdfs.semanticscholar.org/d882/0183f2c159296f7242f990cfc4c802284c8b.pdf},
volume = {27},
year = {2014}
}
@article{Gelman2011,
author = {Gelman, Andrew},
file = {::},
journal = {Chance},
number = {4},
pages = {51--53},
title = {{Ethics and Statistics}},
url = {http://www.bxscience.edu/ourpages/auto/2015/3/26/40675216/Gelman Chance Ethics1.pdf},
volume = {24},
year = {2011}
}
@article{Tyron1998,
abstract = {Comments on the article by R. L. Hagen (see record 1997-02239-002) praising the null hypothesis statistical test (NHST). Hagen's praise of the NHST may be supported on purely technical grounds but it is unfortunate if it prolongs primary reliance on NHST to evaluate quantitative difference and equivalence given the prominent human factors problem of widespread and intractable interpretation errors. Alternative methods are available for these purposes that are far less subject to misinterpretation. The science of psychology can openly benefit by supplementing, if not replacing, NHST practices with these methods. ((c) 1998 APA/PsycINFO, all rights reserved)},
author = {Tyron, Warren W.},
doi = {10.1037/0003-066X.53.7.796.b},
file = {::},
isbn = {0003-066x},
issn = {1935-990X},
journal = {American Psychologist},
number = {7},
pages = {796--796},
title = {{The inscrutable null hypothesis.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0003-066X.53.7.796.b},
volume = {53},
year = {1998}
}
@article{Gelman2011a,
author = {Gelman, Andrew},
doi = {10.1080/09332480.2011.10739888},
issn = {0933-2480},
journal = {CHANCE},
month = {sep},
number = {4},
pages = {51--53},
publisher = {Taylor {\&} Francis},
title = {{Ethics and Statistics}},
url = {http://www.tandfonline.com/doi/full/10.1080/09332480.2011.10739888},
volume = {24},
year = {2011}
}
@article{Tryon2001,
abstract = {Null hypothesis statistical testing (NHST) has been debated extensively but always successfully defended. The technical merits of NHST are not disputed in this article. The widespread misuse of NHST has created a human factors problem that this article intends to ameliorate. This article describes an integrated, alternative inferential confidence interval approach to testing for statistical difference, equivalence, and indeterminacy that is algebraically equivalent to standard NHST procedures and therefore exacts the same evidential standard. The combined numeric and graphic tests of statistical difference, equivalence, and indeterminacy are designed to avoid common interpretive problems associated with NHST procedures. Multiple comparisons, power, sample size, test reliability, effect size, and cause-effect ratio are discussed. A section on the proper interpretation of confidence intervals is followed by a decision rule summary and caveats.},
annote = {I LOVE the point he makes that NHST is a human factors problem, but I'm not keen on his solutions, which are to basically modify confidence intervals then use those. I don't think confidence intervals are the answer.},
author = {Tryon, W W},
file = {::},
issn = {1082-989X},
journal = {Psychological methods},
keywords = {NHST: human factors problem,Statistics, misconceptions,expert miconceptions,null hypothesis, always false,null results: prevalence in science,p-values, discourage replications,statistical reasoning},
mendeley-tags = {NHST: human factors problem,Statistics, misconceptions,expert miconceptions,null hypothesis, always false,null results: prevalence in science,p-values, discourage replications,statistical reasoning},
month = {dec},
number = {4},
pages = {371--86},
pmid = {11778678},
title = {{Evaluating statistical difference, equivalence, and indeterminacy using inferential confidence intervals: an integrated alternative method of conducting null hypothesis statistical tests.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/11778678},
volume = {6},
year = {2001}
}
@article{Counsell2017,
abstract = {With recent focus on the state of research in psychology, it is essential to assess the nature of the statistical methods and analyses used and reported by psychological researchers. To that end, we investigated the prevalence of different statistical procedures and the nature of statistical reporting practices in recent articles from the four major Canadian psychology journals. The majority of authors evaluated their research hypotheses through the use of analysis of variance (ANOVA), t-tests, and multiple regression. Multivariate approaches were less common. Null hypothesis significance testing remains a popular strategy, but the majority of authors reported a standardized or unstandardized effect size measure alongside their significance test results. Confidence intervals on effect sizes were infrequently employed. Many authors provided minimal details about their statistical analyses and less than a third of the articles presented on data complications such as missing data and violations of statistical assumptions. Strengths of and areas needing improvement for reporting quantitative results are highlighted. The paper concludes with recommendations for how researchers and reviewers can improve comprehension and transparency in statistical reporting.},
author = {Counsell, Alyssa and Harlow, Lisa L},
doi = {10.1037/cap0000074},
file = {::},
issn = {0708-5591},
journal = {Canadian psychology = Psychologie canadienne},
keywords = {Canadian psychology,nothings changing,quantitative methods,reporting practices,review,statistics},
mendeley-tags = {nothings changing,reporting practices},
month = {may},
number = {2},
pages = {140--147},
pmid = {28684887},
publisher = {NIH Public Access},
title = {{Reporting Practices and Use of Quantitative Methods in Canadian Journal Articles in Psychology.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/28684887 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5494980},
volume = {58},
year = {2017}
}
@article{Ho2018,
abstract = {Over the past 75 years, a number of statisticians have advised that the data-analysis method known as null-hypothesis significance testing (NHST) should be deprecated (Berkson, 1942; Halsey et al., 2015). The limitations of NHST have been extensively discussed, with an emerging consensus that current statistical practice in the biological sciences needs reform. However, there is less agreement on the specific nature of reform, with vigorous debate surrounding what would constitute a suitable alternative (Altman et al., 2000; Benjamin et al., 2017; Cumming and Calin-Jageman, 2016). An emerging view is that a more complete analytic technique would use statistical graphics to estimate effect sizes and their uncertainty (Cohen, 1994; Cumming and Calin-Jageman, 2016). As these estimation methods require only minimal statistical retraining, they have great potential to change the current data-analysis culture away from dichotomous thinking towards quantitative reasoning (Claridge-Chang and Assam, 2016). The evolution of statistics has been inextricably linked to the development of improved quantitative displays that support complex visual reasoning (Tufte, 2001). We consider that the graphic we describe here as an estimation plot is the most intuitive way to display the complete statistical information about experimental data sets. However, a major obstacle to adopting estimation is accessibility to suitable software. To overcome this hurdle, we have developed free software that makes high-quality estimation plotting available to all. Here, we explain the rationale for estimation plots by contrasting them with conventional charts used to display NHST data, and describe how the use of these graphs affords five major analytical advantages.},
annote = {Really lame article. It proposes a special type of plot for a t test, and that's about it. The plot is okay, I guess, but nothing revoluationary. I do like that they advocate for raw data.},
author = {Ho, Joses and Tumkaya, Tayfun and Aryal, Sameer and Choi, Hyungwon and Claridge-Chang, Adam},
doi = {10.1101/377978},
file = {::},
journal = {bioRxiv},
keywords = {graphics, superiority of,graphics: condensing information,heuristic: raw data},
mendeley-tags = {graphics, superiority of,graphics: condensing information,heuristic: raw data},
month = {jul},
pages = {377978},
publisher = {Cold Spring Harbor Laboratory},
title = {{Moving beyond P values: Everyday data analysis with estimation plots}},
url = {https://www.biorxiv.org/content/early/2018/07/26/377978},
year = {2018}
}
@article{Gliner2002,
abstract = {Abstract The first of 3 objectives in this study was to address the major problem with Null Hypothesis Significance Testing (NHST) and 2 common misconceptions related to NHST that cause confusion for students and researchers. The misconceptions are (a) a smaller p indicates a stronger relationship and (b) statistical significance indicates practical importance. The second objective was to determine how this problem and the misconceptions were treated in 12 recent textbooks used in education research methods and statistics classes. The third objective was to examine how the textbooks' presentations relate to current best practices and how much help they provide for students. The results show that almost all of the textbooks fail to acknowledge that there is controversy surrounding NHST. Most of the textbooks dealt, at least minimally, with the alleged misconceptions of interest, but they provided relatively little help for students.},
author = {Gliner, Jeffrey A. and Leech, Nancy L. and Morgan, George A.},
doi = {10.1080/00220970209602058},
file = {::;::},
issn = {0022-0973},
journal = {The Journal of Experimental Education},
keywords = {NHST,effect size,practical importance,research and statistics textbooks},
month = {jan},
number = {1},
pages = {83--92},
publisher = { Taylor {\&} Francis Group },
title = {{Problems With Null Hypothesis Significance Testing (NHST): What Do the Textbooks Say?}},
url = {http://www.tandfonline.com/doi/abs/10.1080/00220970209602058},
volume = {71},
year = {2002}
}
@article{Micceri1989,
author = {Micceri, Theodore},
file = {::},
journal = {Psychological Bulletin},
number = {1},
pages = {156--166},
title = {{The Unicorn, The Normal Curve, and Other Improbable Creatures}},
url = {https://pdfs.semanticscholar.org/2903/180261ee0d99a27cfe85cde9cf4af74923c6.pdf},
volume = {105},
year = {1989}
}
@article{Velicer2008,
abstract = {Traditional Null Hypothesis Testing procedures are poorly adapted to theory testing. The methodology can mislead researchers in several ways, including: (a) a lack of power can result in an erroneous rejection of the theory; (b) the focus on directionality (ordinal tests) rather than more precise quantitative predictions limits the information gained; and (c) the misuse of probability values to indicate effect size. An alternative approach is proposed which involves employing the theory to generate explicit effect size predictions that are compared to the effect size estimates and related confidence intervals to test the theoretical predictions. This procedure is illustrated employing the Transtheoretical Model. Data from a sample (N = 3,967) of smokers from a large New England HMO system were used to test the model. There were a total of 15 predictions evaluated, each involving the relation between Stage of Change and one of the other 15 Transtheoretical Model variables. For each variable, omega-squared and the related confidence interval were calculated and compared to the predicted effect sizes. Eleven of the 15 predictions were confirmed, providing support for the theoretical model. Quantitative predictions represent a much more direct, informative, and strong test of a theory than the traditional test of significance.},
archivePrefix = {arXiv},
arxivId = {1105.1486},
author = {Velicer, Wayne F. and Cumming, Geoff and Fava, Joseph L. and Rossi, Joseph S. and Prochaska, James O. and Johnson, Janet},
doi = {10.1111/j.1464-0597.2008.00348.x},
eprint = {1105.1486},
file = {::},
isbn = {0269-994X},
issn = {0269994X},
journal = {Applied Psychology},
pmid = {22837590},
title = {{Theory testing using quantitative predictions of effect size}},
year = {2008}
}
@article{Kwan2009,
abstract = {In the debate over null hypothesis significance testing, Paul Meehl strongly advocated appraising theories through the generation and evaluation of precise predictions (e.g., Meehl, 1978). The study of personality structure through the five-factor model (FFM; McCrae {\&} John, 1992) is an important area of research where one encounters many precise predictions. Extant methods of assessing such predictions, however, do not allow researchers to examine the outcome of the predictions in great detail. That is, it may be difficult to determine how estimates fail to match predicted values. As Meehl argued, one must examine how a theory fails to predict in order to refine and improve the theory. To promote better theory appraisal in FFM research, we present a powerful new tool, called a tableplot (Kwan, 2008a), that can summarize and clarify factor analytic results. Specifically, we illustrate how the tableplot enables detailed appraisal of precise predictions in the FFM.},
author = {Kwan, Ernest and Lu, Irene R.R. and Friendly, Michael},
doi = {10.1027/0044-3409.217.1.38},
file = {::},
issn = {0044-3409},
journal = {Zeitschrift f{\"{u}}r Psychologie / Journal of Psychology},
title = {{Tableplot}},
year = {2009}
}
@article{Rosnow1996,
abstract = {We describe convenient statistical procedures that will enable research consumers (e.g., professional psychologists, graduate students, and researchers themselves) to reach beyond the published conclusions and make an independent assessment of the reported results. Appropriately conceived contrasts accompanied by effect size estimates often allow researchers to address precise predictions that the authors of the published report may have ignored or abandoned prematurely. We describe the use of t, F, and Z to compute contrasts with different raw ingredients, and we review 3 effect size indices (Cohen's d, Hedges's g, and Pearson r) and a way of displaying the magnitude of any effect size r. We also describe how to construct confidence limits for the obtained effect as well as its null-counternull interval. (PsycINFO Database Record (c) 2010 APA, all rights reserved) (journal abstract).},
author = {Rosnow, Ralph L. and Rosenthal, Robert},
doi = {10.1037/1082-989X.1.4.331},
file = {::},
isbn = {1082-989X$\backslash$n1939-1463},
issn = {1082989X},
journal = {Psychological Methods},
title = {{Computing contrasts, effect sizes, and counternulls on other people's published data: General procedures for research consumers}},
year = {1996}
}
@article{Francis2017,
author = {Francis, Gregory},
doi = {10.3758/s13428-016-0812-3},
issn = {1554-3528},
journal = {Behavior Research Methods},
number = {4},
pages = {1524--1538},
title = {{Equivalent statistics and data interpretation}},
url = {http://link.springer.com/10.3758/s13428-016-0812-3},
volume = {49},
year = {2017}
}
@incollection{Kosslyn2006c,
author = {Kosslyn, Stephen M.},
file = {::},
pages = {111--124},
title = {{Chapter 4: Creating Pie Graphs, Divided-Bar Graphs, and Visual Tables}},
year = {2006}
}
@article{Fisher2014,
author = {Fisher, Aaron and Anderson, G. Brooke and Peng, Roger and Leek, Jeff},
doi = {10.7717/PEERJ.589},
issn = {2167-8359},
journal = {PeerJ},
pages = {e589},
title = {{A randomized trial in a massive online open course shows people don't know what a statistically significant relationship looks like, but they can learn}},
url = {https://peerj.com/articles/589},
volume = {2},
year = {2014}
}
@article{Kuhberger2015,
abstract = {Statistical significance is an important concept in empirical science. However the meaning of the term varies widely. We investigate into the intuitive understanding of the notion of significance. We described the results of two different experiments published in a major psychological journal to a sample of students of psychology, labeling the findings as ‘significant' versus ‘non-significant.' Participants were asked to estimate the effect sizes and sample sizes of the original studies. Labeling the results of a study as significant was associated with estimations of a big effect, but was largely unrelated to sample size. Similarly, non-significant results were estimated as near zero in effect size. After considerable training in statistics, students largely equate statistical significance with medium to large effect sizes, rather than with large sample sizes. The data show that students assume that statistical significance is due to real effects, rather than to ‘statistical tricks' (e.g., increasing sample size).},
author = {K{\"{u}}hberger, Anton and Fritz, Astrid and Lermer, Eva and Scherndl, Thomas},
doi = {10.1186/s13104-015-1020-4},
issn = {1756-0500},
journal = {BMC Research Notes 2015 8:1},
number = {1},
pages = {84},
title = {{The significance fallacy in inferential statistics}},
url = {http://www.biomedcentral.com/1756-0500/8/84},
volume = {8},
year = {2015}
}
@techreport{Kalinowski,
abstract = {Our research in statistical cognition uses both qualitative and quantitative methods. A mixed method approach makes our research more comprehensive, and provides us with new directions, unexpected insights, and alternative explanations for previously established concepts. In this paper, we review four statistical cognition studies that used mixed methods and explain the contributions of both the quantitative and qualitative components. The four studies investigated concern statistical reporting practices in medical journals, an intervention aimed at improving psychologists' interpretations of statistical tests, the extent to which interpretations improve when results are presented with confidence intervals (CIs) rather than p-values, and graduate students' misconceptions about CIs. Finally, we discuss the concept of scientific rigour and outline guidelines for maintaining rigour that should apply equally to qualitative and quantitative research.},
author = {Kalinowski, Pav and Lai, Jerry and Fidler, Fiona and Cumming, Geoff},
file = {::},
keywords = {Mixed methods,Qualitative analysis,Scientific rigour,Statistics education research},
title = {{QUALITATIVE RESEARCH: AN ESSENTIAL PART OF STATISTICAL COGNITION RESEARCH 3}},
url = {http://www.stat.auckland.ac.nz/serj}
}
@article{Dragicevic2016,
author = {Dragicevic, Pierre},
doi = {10.1007/978-3-319-26633-6_13},
pages = {291--330},
title = {{Fair Statistical Communication in HCI}},
url = {http://link.springer.com/10.1007/978-3-319-26633-6{\_}13},
year = {2016}
}
@article{Beyth-Marom,
abstract = {Practitioners and teachers should be able to justify their chosen techniques by taking into account research results: This is evidence-based practice (EBP). We argue that, specifically, statistical practice and statistics education should be guided by evidence, and we propose statistical cognition (SC) as an integration of theory, research, and application to support EBP. SC is an interdisciplinary research field, and a way of thinking. We identify three facets of SC-normative, descriptive, and prescriptive-and discuss their mutual influences. Unfortunately, the three components are studied by somewhat separate groups of scholars, who publish in different journals. These separations impede the implementation of EBP. SC, however, integrates the facets and provides a basis for EBP in statistical practice and education.},
author = {Beyth-Marom, Ruth and Fidler, Fiona and Cumming, Geoff},
file = {::},
journal = {Statistics Education Research Journal},
keywords = {Statistical cognition,Statistical reasoning,Statistics education research},
number = {2},
pages = {20--39},
title = {{Statistical Cognition: Towards evidence-based practice in statistics and statistics education}},
volume = {7},
year = {2008}
}
@article{Greenland2016,
author = {Greenland, Sander and Senn, Stephen J. and Rothman, Kenneth J. and Carlin, John B. and Poole, Charles and Goodman, Steven N. and Altman, Douglas G.},
doi = {10.1007/s10654-016-0149-3},
issn = {0393-2990},
journal = {European Journal of Epidemiology},
number = {4},
pages = {337--350},
title = {{Statistical tests, {\textless}Emphasis Type="Italic"{\textgreater}P{\textless}/Emphasis{\textgreater} values, confidence intervals, and power: a guide to misinterpretations}},
url = {http://link.springer.com/10.1007/s10654-016-0149-3},
volume = {31},
year = {2016}
}
@article{Cortina1998,
abstract = {Previous research has suggested that there exists a bias in the social sciences against no-effect hypotheses. This is regrettable given the importance of establishing not only when an effect does occur but also the boundary conditions of that effect. The purposes of this article are two-fold The first purpose is to review relevant portions of the history of hypothesis testing in an attempt to identify the sources of bias against hypotheses of no effect. The second purpose is to develop and describe rigorous methods for providing evidence in support of no-effect hypotheses-methods that avoid some of the problems traditionally associated with no-effect conclusions.},
author = {Cortina, Jose M. and Folger, Robert G.},
doi = {10.1177/109442819813004},
issn = {1094-4281},
journal = {Organizational Research Methods},
month = {jul},
number = {3},
pages = {334--350},
publisher = {Sage PublicationsSage CA: Thousand Oaks, CA},
title = {{When is it Acceptable to Accept a Null Hypothesis: No Way, Jose?}},
url = {http://journals.sagepub.com/doi/10.1177/109442819813004},
volume = {1},
year = {1998}
}
@article{Meehl1967,
author = {Meehl, Paul E},
file = {::},
journal = {Philosophy of Science},
keywords = {always false,antiempirical,antitheoretical,meehl,null hypothesis,theory-data link},
mendeley-tags = {always false,antiempirical,antitheoretical,meehl,null hypothesis,theory-data link},
number = {2},
pages = {103--115},
title = {{Theory-Testing in Psychology and Physics: A Methodological Paradox}},
volume = {34},
year = {1967}
}
@book{Cook2002,
address = {Boston, MA},
author = {Shadish, William and Cook, Thomas D and Campbell, Donald Thomas},
isbn = {0395615569},
publisher = {Houghton Mifflin},
title = {{Experimental and quasi-experimental designs for generalized causal inference}},
year = {2002}
}
@article{Rodgers2016,
abstract = {The Bayesian-frequentist debate typically portrays these statistical perspectives as opposing views. However, both Bayesian and frequentist statisticians have expanded their epistemological basis away from a singular focus on the null hypothesis, to a broader perspective involving the development and comparison of competing statistical/mathematical models. For frequentists, statistical developments such as structural equation modeling and multilevel modeling have facilitated this transition. For Bayesians, the Bayes factor has facilitated this transition. The Bayes factor is treated in articles within this issue of Multivariate Behavioral Research. The current presentation provides brief commentary on those articles and more extended discussion of the transition toward a modern modeling epistemology. In certain respects, Bayesians and frequentists share common goals.},
author = {Rodgers, Joseph Lee},
doi = {10.1080/00273171.2015.1093459},
file = {::},
issn = {00273171},
journal = {Multivariate Behavioral Research},
keywords = {Bayes factor,Bayesian methods,bayes factor,bayesian, complexity of,frequentist methods,statistical modeling},
mendeley-tags = {bayes factor,bayesian, complexity of},
number = {1},
pages = {30--34},
pmid = {26881955},
title = {{Moving in Parallel Toward a Modern Modeling Epistemology: Bayes Factors and Frequentist Modeling Methods}},
volume = {51},
year = {2016}
}
@article{Pashler2012,
abstract = {lkj this special section had a lot of incisive articles. I read the paper version. Visit http://pps.sagepub.com/content/7/6.toc if wanting to see those articles again.},
author = {Pashler, Harold and Wagenmakers, Eric-Jan},
doi = {10.1177/1745691612465253},
isbn = {1745-6916$\backslash$n1745-6924},
issn = {17456916},
journal = {Perspectives on Psychological Science},
number = {6},
pages = {528--530},
pmid = {26168108},
title = {{Editors' Introduction to the Special Section on Replicability in Psychological Science: A Crisis of Confidence?}},
volume = {7},
year = {2012}
}
@article{Anderson1999,
abstract = {This article examines the truism that studies from psychological laboratories are low in external validity. Past rational and empirical explorations of this truism found little support for it. A broader empirical approach was taken for the study reported here; correspondence between lab and field was compared across a broad range of domains, including aggression, helping, leadership style, social loafing, self-efficacy, depression, and memory, among others. Correspondence between lab- and field-based effect sizes of conceptually similar independent and dependent variables was considerable. In brief, the psychological laboratory has generally produced psychological truths, rather than trivialities. These same data suggest that a companion truism about field studies in psychology—that they are generally low on internal validity—is also false.},
author = {Anderson, Craig A. and Lindsay, James J. and Bushman, Brad J.},
doi = {10.1111/1467-8721.00002},
issn = {0963-7214},
journal = {Current Directions in Psychological Science},
keywords = {External validity,metanalysis,philosophy of science},
month = {feb},
number = {1},
pages = {3--9},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Research in the Psychological Laboratory}},
url = {http://journals.sagepub.com/doi/10.1111/1467-8721.00002},
volume = {8},
year = {1999}
}
@article{Tversky1971,
abstract = {Reports that people have erroneous intuitions about the laws of chance. In particular, they regard a sample randomly drawn from a population as highly representative, I.e., similar to the population in all essential characteristics. The prevalence of the belief and its unfortunate consequences for psychological research are illustrated by the responses of 84 professional psychologists to a questionnaire concerning research decisions. (PsycINFO Database Record (c) 2012 APA, all rights reserved)},
author = {Tversky, Amos and Kahneman, Daniel},
doi = {10.1037/h0031322},
file = {::},
isbn = {1939-1455(Electronic);0033-2909(Print)},
issn = {00332909},
journal = {Psychological Bulletin},
keywords = {Statistics, misconceptions,erroneous intuitions about random sampling, conseq,expert miconceptions,failed replications,illusion of certainty,p-values and replication,p-values, discourage replications,p-values, regression to the mean,p-values, variability of,replication,statistical reasoning,teachers of statistics,uncertainty, value of},
mendeley-tags = {Statistics, misconceptions,expert miconceptions,failed replications,illusion of certainty,p-values and replication,p-values, discourage replications,p-values, regression to the mean,p-values, variability of,replication,statistical reasoning,teachers of statistics,uncertainty, value of},
pmid = {1},
title = {{Belief in the law of small numbers}},
year = {1971}
}
@article{Adjerid2018,
abstract = {The potential for big data to provide value for psychology is significant. However, the pursuit of big data remains an uncertain and risky undertaking for the average psychological researcher. In this article, we address some of this uncertainty by discussing the potential impact of big data on the type of data available for psychological research, addressing the benefits and most significant challenges that emerge from these data, and organizing a variety of research opportunities for psychology. Our article yields two central insights. First, we highlight that big data research efforts are more readily accessible than many researchers realize, particularly with the emergence of open-source research tools, digital platforms, and instrumentation. Second, we argue that opportunities for big data research are diverse and differ both in their fit for varying research goals, as well as in the challenges they bring about. Ultimately, our outlook for researchers in psychology using and benefiting from big data is cautiously optimistic. Although not all big data efforts are suited for all researchers or all areas within psychology, big data research prospects are diverse, expanding, and promising for psychology and related disciplines.},
author = {Adjerid, Idris and Kelley, Ken},
doi = {10.1037/amp0000190},
file = {::},
journal = {American Psychologist},
keywords = {big data,data science,instrumentation,machine learning},
title = {{American Psychologist Big Data in Psychology: A Framework for Research Advancement Big Data in Psychology: A Framework for Research Advancement}},
year = {2018}
}
@article{Kim2007,
author = {Kim, Jee-Seon and Frees, Edward W.},
doi = {10.1007/s11336-007-9008-1},
file = {::},
issn = {0033-3123},
journal = {Psychometrika},
month = {dec},
number = {4},
pages = {505--533},
publisher = {Springer-Verlag},
title = {{Multilevel Modeling with Correlated Effects}},
url = {http://link.springer.com/10.1007/s11336-007-9008-1},
volume = {72},
year = {2007}
}
@article{Ebbes2005,
author = {Ebbes, Peter and Wedel, Michel and B{\"{o}}ckenholt, Ulf and Steerneman, Ton},
doi = {10.1007/s11129-005-1177-6},
file = {::},
issn = {1570-7156},
journal = {Quantitative Marketing and Economics},
month = {dec},
number = {4},
pages = {365--392},
publisher = {Kluwer Academic Publishers},
title = {{Solving and Testing for Regressor-Error (in)Dependence When no Instrumental Variables are Available: With New Evidence for the Effect of Education on Income}},
url = {http://link.springer.com/10.1007/s11129-005-1177-6},
volume = {3},
year = {2005}
}
@article{Head2015,
abstract = {A focus on novel, confirmatory, and statistically significant results leads to substantial bias in the scientific literature. One type of bias, known as “p-hacking,” occurs when researchers collect or select data or statistical analyses until nonsignificant results become significant. Here, we use text-mining to demonstrate that p-hacking is widespread throughout science. We then illustrate how one can test for p-hacking when performing a meta-analysis and show that, while p-hacking is probably common, its effect seems to be weak relative to the real effect sizes beingmeasured. This result suggests that p-hacking probably does not drastically alter scientific consensuses drawn from meta-analyses.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Head, Megan L. and Holman, Luke and Lanfear, Rob and Kahn, Andrew T. and Jennions, Michael D.},
doi = {10.1371/journal.pbio.1002106},
eprint = {arXiv:1011.1669v3},
isbn = {1545-7885 (Electronic) 1544-9173 (Linking)},
issn = {15457885},
journal = {PLoS Biology},
number = {3},
pmid = {25768323},
title = {{The Extent and Consequences of P-Hacking in Science}},
volume = {13},
year = {2015}
}
@article{Nuijten2016,
abstract = {This study documents reporting errors in a sample of over 250,000 p-values reported in eight major psychology journals from 1985 until 2013, using the new R package “statcheck.” statcheck retrieved null-hypothesis significance testing (NHST) results from over half of the articles from this period. In line with earlier research, we found that half of all published psychology papers that use NHST contained at least one p-value that was inconsistent with its test statistic and degrees of freedom. One in eight papers contained a grossly inconsistent p-value that may have affected the statistical conclusion. In contrast to earlier findings, we found that the average prevalence of inconsistent p-values has been stable over the years or has declined. The prevalence of gross inconsistencies was higher in p-values reported as significant than in p-values reported as nonsignificant. This could indicate a systematic bias in favor of significant results. Possible solutions for the high prevalence of reporting inconsistencies could be to encourage sharing data, to let co-authors check results in a so-called “co-pilot model,” and to use statcheck to flag possible inconsistencies in one's own manuscript or during the review process.},
author = {Nuijten, Mich{\"{i}}¿½le B. and Hartgerink, Chris H.J. and van Assen, Marcel A.L.M. and Epskamp, Sacha and Wicherts, Jelte M.},
doi = {10.3758/s13428-015-0664-2},
isbn = {1554-3528 (Electronic)$\backslash$r1554-351X (Linking)},
issn = {15543528},
journal = {Behavior Research Methods},
number = {4},
pmid = {26497820},
title = {{The prevalence of statistical reporting errors in psychology (1985–2013)}},
volume = {48},
year = {2016}
}
@article{Ioannidis2014,
abstract = {In a 2005 paper that has been accessed more than a million times, John Ioannidis explained why most published research findings were false. Here he revisits the topic, this time to address how to improve matters. Please see later in the article for the Editors' Summary.},
author = {Ioannidis, John P A},
doi = {10.1371/journal.pmed.1001747},
isbn = {1549-1277},
issn = {15491676},
journal = {PLoS Medicine},
number = {10},
pmid = {25334033},
title = {{How to Make More Published Research True}},
volume = {11},
year = {2014}
}
@inproceedings{Nuijten,
address = {Kyoto, Japan},
author = {Nuijten, Mich{\`{e}}le B},
booktitle = {Looking back, looking forward. Proceedings of the Tenth International Conference on Teaching StatisticsJ},
editor = {Sorto, M. A. and White, A. and Guyot, L.},
publisher = {International Statistical Institute},
title = {{USING “STATCHECK” TO DETECT AND PREVENT STATISTICAL REPORTING INCONSISTENCIES}}
}
@misc{Anderson2016,
abstract = {As the field of psychology struggles to trust published findings, replication research has begun to become more of a priority to both scientists and journals. With this increasing emphasis placed on reproducibility, it is essential that replication studies be capable of advancing the field. However, we argue that many researchers have been only narrowly interpreting the meaning of replication, with studies being designed with a simple statistically significant or nonsignificant results framework in mind. Although this interpretation may be desirable in some cases, we develop a variety of additional “replication goals” that researchers could consider when planning studies. Even if researchers are aware of these goals, we show that they are rarely used in practice—as results are typically analyzed in a manner only appropriate to a simple significance test. We discuss each goal conceptually, explain appropriate analysis procedures, and provide 1 or more examples to illustrate these analyses in practice. We hope that these various goals will allow researchers to develop a more nuanced understanding of replication that can be flexible enough to answer the various questions that researchers might seek to understand. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {Anderson, Samantha F.: Department of Psychology, University of Notre Dame, 118 Haggar Hall, Notre Dame, IN, US, 46556, sander10@nd.edu},
annote = {Good article. Suggests 6 research goals/definitions for what's considered a “replication” and identifies the appropriate statistics to use. Challenges the idea that significance is the only criteria, though this article too relies on significance a little too much IMO.},
author = {Anderson, Samantha F and Maxwell, Scott E},
booktitle = {Psychological Methods},
doi = {10.1037/met0000051},
file = {::},
isbn = {1939-1463(Electronic),1082-989X(Print)},
keywords = {*Experimental Replication,*Statistical Analysis,Effect Size (Statistical),Statistical Significance,cumulative different from replication,cumulative evidence,decision, theory driven,effect sizes and p-values,effect sizes, inflation of,failed replications,hypotheses:specific,judgment: circumstantial,p-values and replication,p-values, blanket threshold,p-values, variability of,power: estimation,region of practical equivalence,replication,replication, definition of},
mendeley-tags = {cumulative different from replication,cumulative evidence,decision, theory driven,effect sizes and p-values,effect sizes, inflation of,failed replications,hypotheses:specific,judgment: circumstantial,p-values and replication,p-values, blanket threshold,p-values, variability of,power: estimation,region of practical equivalence,replication,replication, definition of},
number = {1},
pages = {1--12},
publisher = {American Psychological Association},
title = {{There's more than one way to conduct a replication study: Beyond statistical significance.}},
volume = {21},
year = {2016}
}
@article{cumming_statistical_2007,
author = {Cumming, Geoff and Fidler, Fiona and Leonard, Martine and Kalinowski, Pavel and Christiansen, Ashton and Kleinig, Anita and Lo, Jessica and Mcmenamin, Natalie and Wilson, Sarah},
doi = {10.1111/j.1467-9280.2007.01881.x},
file = {::},
journal = {Psychological Science},
number = {3},
pages = {1--4},
title = {{Statistical Reform in Psychology Is Anything Changing ?}},
volume = {18},
year = {2007}
}
@article{Fidler2010,
author = {Fidler, Fiona},
journal = {Data and context in statistics education: Towards an evidence based society},
title = {{The American Psychological Association publication manual sixth edition: Implications for statistics education}},
year = {2010}
}
@misc{Epskamp,
author = {Epskamp, Sacha and Nuijten, Mich{\`{e}}le and Rife, Sean C.},
title = {{Stat Check}},
url = {http://statcheck.io/index.php},
urldate = {2018-10-09}
}
@article{Vacha-Haase2000,
abstract = {The recent fourth edition of the American Psychological Association Publication Manual emphasized that p values are not acceptable indices of effect and `encouraged' effect-size reporting. However, empirical studies of reporting practices of diverse journals unequivocally indicate that this new encouragement has to date been ineffective. Here two additional multi-year studies of APA journals are reported. Additionally, all 50 APA editorials that have been published since 1990 were reviewed to determine how many editors with approval have articulated policies more forceful than the APA Publication Manual's vague and seemingly self-canceling encouragement. It is suggested that changes in editorial policies will be required before improved reporting will become routine.},
author = {Vacha-Haase, Tammi and Nilsson, Johanna E. and Reetz, David R. and Lance, Teresa S. and Thompson, Bruce},
doi = {10.1177/0959354300103006},
file = {::},
issn = {0959-3543},
journal = {Theory {\&} Psychology},
keywords = {effect size,hypothesis test,null hypotheses,significance tests,statistical significance},
month = {jun},
number = {3},
pages = {413--425},
publisher = {Sage PublicationsSage CA: Thousand Oaks, CA},
title = {{Reporting Practices and APA Editorial Policies Regarding Statistical Significance and Effect Size}},
url = {http://journals.sagepub.com/doi/10.1177/0959354300103006},
volume = {10},
year = {2000}
}
@article{McShane2017,
abstract = {We discuss problems the null hypothesis significance testing (NHST) paradigm poses for replication and more broadly in the biomedical and social sciences as well as how these problems remain unresolved by proposals involving modified p-value thresholds, confidence intervals, and Bayes factors. We then discuss our own proposal, which is to abandon statistical significance. We recommend dropping the NHST paradigm--and the p-value thresholds intrinsic to it--as the default statistical paradigm for research, publication, and discovery in the biomedical and social sciences. Specifically, we propose that the p-value be demoted from its threshold screening role and instead, treated continuously, be considered along with currently subordinate factors (e.g., related prior evidence, plausibility of mechanism, study design and data quality, real world costs and benefits, novelty of finding, and other factors that vary by research domain) as just one among many pieces of evidence. We have no desire to "ban" p-values or other purely statistical measures. Rather, we believe that such measures should not be thresholded and that, thresholded or not, they should not take priority over the currently subordinate factors. We also argue that it seldom makes sense to calibrate evidence as a function of p-values or other purely statistical measures. We offer recommendations for how our proposal can be implemented in the scientific publication process as well as in statistical decision making more broadly.},
annote = {Good article, but it tends to rehash the old arguments against an HST. I do like their approach that they model in the appendix. Also, there is much more of a focus on reporting all results, rather than the ones that were significant.},
archivePrefix = {arXiv},
arxivId = {1709.07588},
author = {McShane, Blakeley B. and Gal, David and Gelman, Andrew and Robert, Christian and Tackett, Jennifer L.},
eprint = {1709.07588},
file = {::},
keywords = {decision criteria, universal,dichotomous decision making, problems with,editors: pressure to NHST,evidence,illusion of certainty,judgment,magic alternative,multiple testing, defenses of,null hypothesis, always false,p-values, blanket threshold,p-values, lower threshold,p-values, multiple testing,project: specific hypotheses,reform, resources,uncertainty, value of},
mendeley-tags = {decision criteria, universal,dichotomous decision making, problems with,editors: pressure to NHST,evidence,illusion of certainty,judgment,magic alternative,multiple testing, defenses of,null hypothesis, always false,p-values, blanket threshold,p-values, lower threshold,p-values, multiple testing,project: specific hypotheses,reform, resources,uncertainty, value of},
month = {sep},
title = {{Abandon Statistical Significance}},
url = {http://arxiv.org/abs/1709.07588},
year = {2017}
}
@book{Kline2004,
address = {Washington D.C.},
author = {Kline, Rex B.},
pages = {325},
publisher = {American Psychological Association},
title = {{Beyond Significance Testing: Reforming Data Analysis Methods in Behavioral Research}},
year = {2004}
}
@article{Gelman2015,
abstract = {We connect the replication crisis in social science to the default model of constant effects coupled with the flawed statistical approach of null hypothesis significance testing and the related problems arising from the default model of constant treatment effects. We argue that Bayesian modeling of interactions could lead to a general improvement in the communication and understanding of research results. Moving to Bayesian methods (or, more generally, multilevel approaches that incorporate external information) offers an opportunity for introspection into how social scientists interact with social phenomena to produce knowledge.},
author = {Gelman, Andrew},
doi = {10.1177/0149206314525208},
issn = {0149-2063},
journal = {Journal of Management},
keywords = {hierarchical modeling,multilevel modeling,null hypothesis significance testing,p values,regression analysis,variation},
month = {feb},
number = {2},
pages = {632--643},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{The Connection Between Varying Treatment Effects and the Crisis of Unreplicable Research}},
url = {http://journals.sagepub.com/doi/10.1177/0149206314525208},
volume = {41},
year = {2015}
}
@incollection{Heene2017,
address = {Hoboken, NJ, USA},
annote = {The best part of the article is that it notes that NHST is anti-popperian: it can never falsify the alternative (just fail to find enough evidence for it).},
author = {Heene, Moritz and Ferguson, Christopher J.},
booktitle = {Psychological Science Under Scrutiny},
doi = {10.1002/9781119095910.ch3},
file = {::},
keywords = {NHST: falsification failure,Type II errors,antiempirical,change: grassroots,failed replications,false null hypothesis,falsification,meta‐analysis,null results: prevalence in science,p-hacking, evidence of,psychological science's aversion,publication bias,statistical power},
mendeley-tags = {NHST: falsification failure,antiempirical,change: grassroots,failed replications,falsification,null results: prevalence in science,p-hacking, evidence of},
month = {feb},
pages = {34--52},
publisher = {John Wiley {\&} Sons, Inc.},
title = {{Psychological Science's Aversion to the Null, and Why Many of the Things You Think Are True, Aren't}},
url = {http://doi.wiley.com/10.1002/9781119095910.ch3},
year = {2017}
}
@article{Krueger2017,
abstract = {Many statistical methods yield the probability of the observed data – or data more extreme – under the assumption that a particular hypothesis is true. This probability is commonly known as ‘the' p value. (Null Hypothesis) Significance Testing ([NH]ST) is the most prominent of these methods. The p value has been subjected to much speculation, analysis, and criticism. We explore how well the p value predicts what researchers presumably seek: the probability of the hypothesis being true given the evidence, and the probability of reproducing significant results. We also explore the effect of sample size on inferential accuracy, bias, and error. In a series of simulation experiments, we find that the p value performs quite well as a heuristic cue in inductive inference, although there are identifiable limits to its usefulness. We conclude that despite its general usefulness, the p value cannot bear the full burden of inductive inference; it is but one of several heuristic cues available to the data analyst. Depending on the inferential challenge at hand, investigators may supplement their reports with effect size estimates, Bayes factors, or other suitable statistics, to communicate what they think the data say.},
author = {Krueger, Joachim I. and Heck, Patrick R.},
doi = {10.3389/fpsyg.2017.00908},
file = {::},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Bayes' theorem,NHST,Null hypotheses,P-values,Statistical significance testing,replicability,reverse inference},
month = {jun},
pages = {908},
publisher = {Frontiers},
title = {{The Heuristic Value of p in Inductive Statistical Inference}},
url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2017.00908/full},
volume = {8},
year = {2017}
}
@techreport{Rosenthal1996,
address = {Newark, NJ},
annote = {Interesting, especially in its push for EDA},
author = {Rosenthal, Robert and Cohen, Jacob and Aiken, Leona S. and Appelbaum, Mark and Boodoo, Gwyneth M. and Kenny, David A. and Kraemer, Helena C. and Rubin, Donald B. and Wainer, Howard and Wilkinson, Leland},
file = {::},
institution = {Newark Airport},
keywords = {EDA vs. CDA: misunderstandings,EDA: publishability of results,EDA: recommended},
mendeley-tags = {EDA vs. CDA: misunderstandings,EDA: publishability of results,EDA: recommended},
title = {{Initial Report: Task Force on Statistical Inference}},
url = {https://www.apa.org/science/leadership/bsa/statistical/tfsi-initial-report.pdf},
year = {1996}
}
@article{Savalei2015,
author = {Savalei, Victoria and Dunn, Elizabeth},
doi = {10.3389/fpsyg.2015.00245},
file = {::},
issn = {1664-1078},
journal = {Frontiers in psychology},
keywords = {Bayes factors,confidence intervals (CIs),crisis of replicability,null hypothesis significance testing (NHST),p-values},
pages = {245},
pmid = {25798124},
publisher = {Frontiers Media SA},
title = {{Is the call to abandon p-values the red herring of the replicability crisis?}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/25798124 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4351564},
volume = {6},
year = {2015}
}
@incollection{Kosslyn2006b,
author = {Kosslyn, Stephen Michael},
booktitle = {Graph Design for Eye and Mind},
file = {::},
keywords = {heuristic: display different ways,heuristic: overlapping datapoints,visual patterns},
mendeley-tags = {heuristic: display different ways,heuristic: overlapping datapoints,visual patterns},
pages = {29--74},
title = {{Chapter 2: Choosing a Graph Format}},
year = {2006}
}
@book{Kosslyn2006e,
address = {New York, New York, USA},
author = {Kosslyn, Stephen Michael},
doi = {10.1016/B978-1-4557-7896-6.00058-3},
file = {::},
publisher = {Oxford University Press},
title = {{Graph Design for Eye and Mind}},
url = {http://linkinghub.elsevier.com/retrieve/pii/B9781455778966000583},
year = {2006}
}
@incollection{Kosslyn2006a,
author = {Kosslyn, Stephen Michael},
booktitle = {Graph Design for Eye and Mind},
file = {::},
pages = {3--28},
title = {{Chapter 1: Looking With the Eye and Mind}},
year = {2006}
}
@incollection{Kosslyn2006,
author = {Kosslyn, Stephen Michael},
booktitle = {Graph Design for Eye and Mind},
file = {::},
pages = {261--264},
title = {{Appendix 3}},
year = {2006}
}
@incollection{Kosslyn2006f,
author = {Kosslyn, Stephen Michael},
booktitle = {Graph Design for Eye and Mind},
file = {::},
pages = {10--11},
title = {{Table of Contents}},
year = {2006}
}
@article{Kosslyn2006d,
author = {Kosslyn, Stephen Michael},
file = {::},
isbn = {9780195306620},
title = {{Graph design for eye and mind}},
year = {2006}
}
@article{Jeon2017,
abstract = {The purpose of this article is to investigate the decision qualities of the Bayes factor (BF) method compared with the p value-based null hypothesis significance testing (NHST). The performance of the 2 methods is assessed in terms of the false- and true-positive rates, as well as the false-discovery rates and the posterior probabilities of the null hypothesis for 2 different models: an independent- samples t test and an analysis of variance (ANOVA) model with 2 random factors. Our simulation study results showed the following: (a) The common BF ? 3 criterion is more conservative than the NHST ?? .05 criterion, and it corresponds better with the ?? .01 criterion. (b) An increasing sample size has a different effect on the false-positive rate and the false-discovery rate, depending on whether the BF or NHST approach is used. (c) When effect sizes are randomly sampled from the prior, power curves tend to be flat compared with when effect sizes are prespecified. (d) The larger the scale factor (or the wider the prior), the more conservative the inferential decision is. (e) The false-positive and true-positive rates of the BF method are very sensitive to the scale factor when the effect size is small. (f) While the posterior probabilities of the null hypothesis ideally follow from the BF value, they can be surprisingly high using NHST. In general, these findings were consistent independent of which of the 2 different models was used.},
annote = {This study looked at essentially type i/II error rates using a p value versus Bayesian factor and concluded bayes factor was better and a p-value would occasionally estimate the posterior at 0.05 when it was actually 0.4},
author = {Jeon, Minjeong and {De Boeck}, Paul},
doi = {10.1037/met0000140},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Bayes factors,False-discovery rates,False-positive rates,Posterior probabilities of H0,True-positive rates,bayes factor,bayesian: priors: importance of,decision criteria, universal,p-values and replication,p-values, proximity to posterio,p-values, variability of},
mendeley-tags = {bayes factor,bayesian: priors: importance of,decision criteria, universal,p-values and replication,p-values, proximity to posterio,p-values, variability of},
number = {2},
pages = {340--360},
pmid = {28594227},
title = {{Decision qualities of Bayes factor and p value-based hypothesis testing}},
volume = {22},
year = {2017}
}
@article{Lindsay2015,
author = {Lindsay, D. Stephen},
doi = {10.1177/0956797615616374},
issn = {0956-7976},
journal = {Psychological Science},
month = {dec},
number = {12},
pages = {1827--1832},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Replication in Psychological Science}},
url = {http://journals.sagepub.com/doi/10.1177/0956797615616374},
volume = {26},
year = {2015}
}
@article{Richards1988,
abstract = {The Twenty Questions game played by children has an impre....ive reputation: in this game, participants rapidly gue.... an arbitrarily se-" lected object with rather few, well-chosen questions. This same strat- egy can be used to drive the perceptual process, likewise beginning the search with the intent of deciding whether the object is "animal, vegetable, or mineral." for a perceptual system, however. several sim- ple questioris are required even to make this lirst judgment as to the Kingdom in which the object belongs. Nevertheless, the answers to these lirst simple questions, or their modular outputs, provide a rich data base which can serve to classify objects or events in much more detail than one might expect, thanb to constraints and laws imposed upon natural processes and things. The questions, then, suggest a useful set of primitive modules lor initializing perception.},
author = {Richards, Whitman and Bobick, Aaron},
isbn = {9780893914608},
journal = {Computational Processes in Human Vision: An Interdisciplinary Perspective},
title = {{Playing twenty questions with nature}},
year = {1988}
}
@article{Feldman2001,
abstract = {The process by which the human visual system parses an image into contours, surfaces, and objects--perceptual grouping--has proven difficult to capture in a rigorous and general theory. A natural candidate for such a theory is Bayesian probability theory, which provides optimal interpretations of data under conditions of uncertainty. But the fit of Bayesian theory to human grouping judgments has never been tested, in part because methods for expressing grouping hypotheses probabilistically have not been available. This paper presents such methods for the case of contour integration--that is, the aggregation of a sequence of visual items into a "virtual curve." Two experiments are reported in which human subjects were asked to group ambiguous configurations of dots (in Experiment 1, a sequence of five dots could be judged to contain a "corner" or not; in Experiment 2, an arrangement of six dots could be judged to fall into two disjoint contours or one smooth contour). The Bayesian theory accounts extremely well for subjects' judgments, explaining more than 75{\%} of the variance in both tasks. The theory thus provides a far more quantitatively precise account of human contour integration than has been previously possible, allowing a very precise calculation of the subjective goodness of a virtual chain of dots. Because Bayesian theory is inferentially optimal, this finding suggests a "rational justification," and hence possibly an evolutionary rationale, for some of the rules of perceptual grouping.},
author = {Feldman, Jacob},
doi = {10.3758/BF03194532},
file = {::},
isbn = {0031-5117},
issn = {00315117},
journal = {Perception and Psychophysics},
number = {7},
pages = {1171--1182},
pmid = {11766942},
title = {{Bayesian contour integration}},
volume = {63},
year = {2001}
}
@techreport{Ariely2001,
abstract = {Sets of similar objects are common occurrences-a crowd of people, a bunch of bananas, a copse of trees, a shelf of books, a line of cars. Each item in the set may be distinct, highly visible, and dis-criminable. But when we look away from the set, what information do we have? The current article starts to address this question by introducing the idea of a set representation. This idea was tested using two new paradigms: mean discrimination and member identification. Three experiments using sets of different-sized spots showed that observers know a set's mean quite accurately but know little about the individual items, except their range. Taken together, these results suggest that the visual system represents the overall statistical, and not individual, properties of sets. Sets of similar objects are common occurrences-a crowd of people , a bunch of bananas, a copse of trees, a row of fence posts, a shelf of books, a line of cars. Each item in the set is distinct, highly visible, and discriminable. But when we look away from the set, what information do we have? If something in the set particularly catches our eye, we may retain quite a bit of information about it. A rich representation will also be formed if we had some appropriate categories available such as the names of several people in the crowd. However, when the stimuli do not fall into preformed categories, what information do we have about each of the individual items? What information do we have about the set of items as a whole? Our language supports the idea that a crowd, a bunch, a copse, or a set is somehow different from the sum of its parts. The work reported here examined whether the human visual system makes similar distinctions and represents properties of items in sets as individual items or as a whole. The experiments begin to address the question: Does the visual system create a specific representation for a set of similar objects that is not just the sum of the representations of the individual items? Two new paradigms were used to determine what observers know about the members of a set and what they know about the statistical properties of the set (mean and distribution). Comparing the accuracy with which observers represent information about parts and sets allows us not only to answer these general questions, but also to distinguish between two general approaches to understanding the perception and representation of multiple items. The first approach suggests that when many items are presented, the visual system encodes only low-resolution information about each of them (Neisser, 1967). Such an approach is consistent with limited-capacity models of visual processing: If the visual system has a bottleneck at some point, then the more items there are to process at one time, the fewer bits there are available to represent each one (Nakayama, 1990). The second approach suggests that there may be more efficient ways of dealing with limited capacity than simply reducing the resolution of the local representation. The idea here is that sets of objects could be represented in a qualitatively different way than single items. Therefore , when presented with a set of objects, the visual system does not face trade-offs involving the resolution for encoding local information , but rather decides how to divide resources between the two types of representations (individual and set). GENERAL METHOD This article reports the results of three experiments that were identical in many regards. In particular, the sets and test stimuli in all the experiments consisted of circular spots of various sizes. Such sets have the advantage that the members do not fall into distinct categories , as they could if they varied in color, shape, or orientation. Two of the experiments measured knowledge about the sizes of the individual spots in a set (member-identification experiments), and one measured sensitivity to the mean size of a set (mean-discrimination experiment). In all the experiments, a set of spots was presented in the first temporal interval of a two-interval trial, and a test stimulus, consisting of one or two test spots, was presented in the second interval. An example of a stimulus pair is shown in Figure 1. Both temporal intervals in a trial were 500 ms in duration. There was no blank time between intervals. The 2 observers were male undergraduate students who had normal vision and were naive as to the purpose of the experiment. The same 2 observers participated in all three experiments. No feedback about the correctness of responses was given in any experiment. Each set consisted of spots of four sizes that were equally spaced on a log scale. Each size was separated from the next size by a factor of either 1.05 or 1.4 (n). The mean spot diameter was 0.25. The 1.05 sets included spots of similar sizes-diameters ranged from 0.23 to 0.27 ; the 1.4 sets included spots with dissimilar sizes-diameters ranged from 0.15 to 0.42. These two sets are referred to as sets of similar and dissimilar spots, respectively. Sets with 4, 8, 12, and 16 spots were used, with 1, 2, 3, or 4 spots of each of the four sizes, respectively. The spatial arrangements of the sets were random, with constraints on overall area and minimum proximity between spots to make the average density roughly constant. An example of a set of dissimilar spots (factor of 1.4) is shown in Figure 1 together with a sample test spot. The range of sizes of the test spots exceeded the range of the spots in the sets by the difference between the spot sizes themselves (the factor size). Each set of spots (and its corresponding test spots) was presented in 15 versions: five differently randomly scaled versions, each presented in three different spatial arrangements. The different versions were used to discourage the observers from basing their judgments on previously seen stimuli. In the data analysis and presentation of results , size of the test spot is represented relative to the mean of the set with which it was presented, averaged over the 15 versions.},
author = {Ariely, Dan},
booktitle = {PSYCHOLOGICAL SCIENCE Research Article},
file = {::},
number = {2},
title = {{SEEING SETS: Representation by Statistical Properties}},
volume = {12},
year = {2001}
}
@article{Appelbaum2018,
abstract = {Following a review of extant reporting standards for scientific publication, and reviewing 10 years of experience since publication of the first set of reporting standards by the American Psychological Association (APA; APA Publications and Communications Board Working Group on Journal Article Reporting Standards, 2008), the APA Working Group on Quantitative Research Reporting Standards recommended some modifications to the original standards. Examples of modifications include division of hypotheses, analyses, and conclusions into 3 groupings (primary, secondary, and exploratory) and some changes to the section on meta-analysis. Several new modules are included that report standards for observational studies, clinical trials, longitudinal studies, replication studies, and N-of-1 studies. In addition, standards for analytic methods with unique characteristics and output (structural equation modeling and Bayesian analysis) are included. These proposals were accepted by the Publications and Communications Board of APA and supersede the standards included in the 6th edition of the Publication Manual of the American Psychological Association (APA, 2010). (PsycINFO Database Record},
author = {Appelbaum, Mark and Cooper, Harris and Kline, Rex B. and Mayo-Wilson, Evan and Nezu, Arthur M. and Rao, Stephen M.},
doi = {10.1037/amp0000191},
file = {::},
issn = {1935-990X},
journal = {American Psychologist},
month = {jan},
number = {1},
pages = {3--25},
pmid = {29345484},
title = {{Journal article reporting standards for quantitative research in psychology: The APA Publications and Communications Board task force report.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/29345484 http://doi.apa.org/getdoi.cfm?doi=10.1037/amp0000191},
volume = {73},
year = {2018}
}
@article{John2012,
abstract = {Cases of clear scientific misconduct have received significant media attention recently, but less flagrantly questionable research practices may be more prevalent and, ultimately, more damaging to the academic enterprise. Using an anonymous elicitation format supplemented by incentives for honest reporting, we surveyed over 2,000 psychologists about their involvement in questionable research practices. The impact of truth-telling incentives on self-admissions of questionable research practices was positive, and this impact was greater for practices that respondents judged to be less defensible. Combining three different estimation methods, we found that the percentage of respondents who  have engaged in questionable practices was surprisingly high. This finding suggests that some questionable practices may constitute the prevailing research norm.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {John, Leslie K. and Loewenstein, George and Prelec, Drazen},
doi = {10.1177/0956797611430953},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {1011770956},
issn = {14679280},
journal = {Psychological Science},
keywords = {disclosure,judgment,methodology,professional standards},
pmid = {22508865},
title = {{Measuring the Prevalence of Questionable Research Practices With Incentives for Truth Telling}},
year = {2012}
}
@article{Wagenmakers2012,
abstract = {The veracity of substantive research claims hinges on the way experimental data are collected and analyzed. In this article, we discuss an uncomfortable fact that threatens the core of psychology's academic enterprise: almost without exception, psychologists do not commit themselves to a method of data analysis before they see the actual data. It then becomes tempting to fine tune the analysis to the data in order to obtain a desired result—a procedure that invalidates the interpretation of the common statistical tests. The extent of the fine tuning varies widely across experiments and experimenters but is almost impossible for reviewers and readers to gauge. To remedy the situation, we propose that researchers preregister their studies and indicate in advance the analyses they intend to conduct. Only these analyses deserve the label “confirmatory,” and only for these analyses are the common statistical tests valid. Other analyses can be carried out but these should be labeled “exploratory.” We illustrate...},
annote = {I believe this is the first paper in psychology that suggested pre-registration. They are you that exploratory is great, as long as you explicitly ignore ledge which results are confirmatory and which are exploratory. They do not at all talk about the tools used an exploratory research. Also, they re-analyze Ben's article.},
author = {Wagenmakers, Eric-Jan and Wetzels, Ruud and Borsboom, Denny and van der Maas, Han L.J. and Kievit, Rogier A.},
doi = {10.1177/1745691612463078},
file = {::},
isbn = {1745-6916},
issn = {17456916},
journal = {Perspectives on Psychological Science},
keywords = {Bayesian hypothesis test,EDA and p-values,EDA vs. CDA: misunderstandings,EDA: transparency,ESP,NHST as an exploratory tool,NHST: cause of replication crisis,confirmatory experiments,intentions,multiple testing,p-values,pre-registration,reform,replication crisis: causes,wonky statistics},
mendeley-tags = {EDA and p-values,EDA vs. CDA: misunderstandings,EDA: transparency,NHST as an exploratory tool,NHST: cause of replication crisis,intentions,multiple testing,p-values,pre-registration,reform,replication crisis: causes},
number = {6},
pages = {632--638},
pmid = {24449647},
title = {{An Agenda for Purely Confirmatory Research}},
volume = {7},
year = {2012}
}
@article{Aiken1990,
abstract = {A survey of all PhD programs in psychology in the United States and Canada assessed the extent to which advances in statistics, measurement, and methodology have been incorporated into doctoral training. In all, 84{\%} of the 222 departments responded. The statistical and methodological curriculum has advanced little in 20 years; measurement has experienced a substantial decline. Typical first-year courses serve well only those students who undertake traditional laboratory research. Training in top-ranked schools differs little from that in other schools. New PhDs are judged to be competent to handle traditional techniques, but not newer and often more useful procedures, in their own research. Proposed remedies for these deficiencies include revamping the basic required quantitative and methodological curriculum, culling available training opportunities across campus, and training students in more informal settings, along with providing retraining opportunities for faculty. These strategies also require psychology to attend carefully to the human capital needs that support high-quality quantitative and methodological training and practice.},
author = {Aiken, Leona S. and West, Stephen G. and Sechrest, Lee and Reno, Raymond R. and Roediger, Henry L. and Scarr, Sandra and Kazdin, Alan E. and Sherman, Steven J.},
doi = {10.1037/0003-066X.45.6.721},
file = {::},
issn = {0003066X},
journal = {American Psychologist},
keywords = {reform, student training,teachers of statistics},
mendeley-tags = {reform, student training,teachers of statistics},
number = {6},
pages = {721--734},
title = {{Graduate Training in Statistics, Methodology, and Measurement in Psychology: A Survey of PhD Programs in North America}},
volume = {45},
year = {1990}
}
@article{Haig2005,
abstract = {A broad theory of scientific method is sketched that has particular relevance for the behavioral sciences. This theory of method assembles a complex of specific strategies and methods that are used in the detection of empirical phenomena and the subsequent construction of explanatory theories. A characterization of the nature of phenomena is given, and the process of their detection is briefly described in terms of a multistage model of data analysis. The construction of explanatory theories is shown to involve their generation through abductive, or explanatory, reasoning, their development through analogical modeling, and their fuller appraisal in terms of judgments of the best of competing explanations. The nature and limits of this theory of method are discussed in the light of relevant developments in scientific methodology.},
archivePrefix = {arXiv},
arxivId = {sui},
author = {Haig, Brian D.},
doi = {10.1037/1082-989X.10.4.371},
eprint = {sui},
file = {::},
isbn = {1082-989X (Print) 1082-989X (Linking)},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Abductive reasoning,EDA: Rules for use,EDA: philosophy,EDA: popularity,EDA: recommended,Phenomena detection,Research methodology,Scientific method,Theory construction,converging evidence,cumulative evidence,evolution of,multiple studies,theory,theory-data link},
mendeley-tags = {EDA: Rules for use,EDA: philosophy,EDA: popularity,EDA: recommended,converging evidence,cumulative evidence,evolution of,multiple studies,theory,theory-data link},
number = {4},
pages = {371--388},
pmid = {16392993},
title = {{An abductive theory of scientific method}},
volume = {10},
year = {2005}
}
@article{Fife2012,
abstract = {Though much research and attention has been directed at assessing the correlation coefficient under range restriction, the assessment of reliability under range restriction has been largely ignored. This article uses item response theory to simulate dichotomous item-level data to assess the robustness of KR-20 ($\alpha$), $\omega$, and test–retest under varying selection ratios. These estimators, both corrected and uncorrected for range restriction, were compared in terms of both bias and precision. Test–retest reliability was usually the best estimator of reliability across a variety of conditions. Only under indirect range restriction did KR-20 and $\omega$ performed well. All estimators suffered imprecision as a function of range restriction, above and beyond the reduction in sample size. Based on the results, a set of recommendations are proposed.},
author = {Fife, Dustin A. and Mendoza, Jorge L. and Terry, Robert},
doi = {10.1177/0013164411430225},
issn = {0013-1644},
journal = {Educational and Psychological Measurement},
keywords = {classical test theory,coefficient alpha,coefficient omega,range restriction,reliability,test–retest reliability},
month = {oct},
number = {5},
pages = {862--888},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{The Assessment of Reliability Under Range Restriction}},
url = {http://journals.sagepub.com/doi/10.1177/0013164411430225},
volume = {72},
year = {2012}
}
@misc{Goldin-meadow2016,
annote = {Main point of this article is that the registration may stifle creativity. The author also goes on to say that most of us are not ready for strictly confirmatory work, though she does not use those words. It's also fascinating that she says we need to invent new statistics to handle ETA situations. Hello! They already exist!},
author = {Goldin-meadow, Susan},
booktitle = {Psychological Science},
file = {::},
keywords = {EDA vs. CDA: misunderstandings,pre-registration, criticisms},
mendeley-tags = {EDA vs. CDA: misunderstandings,pre-registration, criticisms},
title = {{Why Preregistration Makes Me Nervous}},
url = {https://www.psychologicalscience.org/observer/why-preregistration-makes-me-nervous},
urldate = {2018-03-10},
year = {2016}
}
@article{Gelman2013b,
abstract = {The traditional system of scientific and scholarly publishing is breaking down in two different directions.},
annote = {Summarizes a lot of my thoughts nicely: who cares about type one and type two errors: do we really think the effect is exactly 0? Instead, we should be more comfortable with uncertainty. Also, at advocates we start thinking about study having definitive results. It also introduces a new idea: simulating a mark study to identify in advance the different assumptions you may have.},
author = {Gelman, Andrew},
doi = {10.1093/pan/mps032},
file = {::},
issn = {14764989},
journal = {Political Analysis},
keywords = {dichotomous decision making,illusion of certainty,mock study,problems with,uncertainty,value of},
mendeley-tags = {dichotomous decision making,illusion of certainty,mock study,problems with,uncertainty,value of},
number = {1},
pages = {40--41},
title = {{Preregistration of studies and mock reports}},
volume = {21},
year = {2013}
}
@misc{Finkel2017,
abstract = {Finkel, Eastwick, and Reis (2015; “FER2015”) argued that psychological science is better served by responding to apprehensions about replicability rates with contextualized solutions than with one-size-fits-all solutions. Here, we extend FER2015's analysis to suggest that much of the discussion of best research practices since 2011 has focused on a single feature of high-quality science—replicability—with insufficient sensitivity to the implications of recommended practices for other features, like discovery, internal validity, external validity, construct validity, consequentiality, and cumulativeness. Thus, although recommendations for bolstering replicability have been innovative, compelling, and abundant, it is difficult to evaluate their impact on our science as a whole, especially because many research practices that are beneficial for some features of scientific quality are harmful for others. For example, FER2015 argued that bigger samples are generally better, but also noted that very large samples (“those larger than required for effect sizes to stabilize”; p. 291) could have the downside of commandeering resources that would have been better invested in other studies. In their critique of FER2015, LeBel, Campbell, and Loving (2016; “LCL2016”) concluded, based on simulated data, that ever-larger samples are better for the efficiency of scientific discovery (i.e., that there are no tradeoffs). As demonstrated here, however, this conclusion holds only when the replicator's resources are considered in isolation. If we widen the assumptions to include the original researcher's resources as well, which is necessary if the goal is to consider resource investment for the field as a whole, the conclusion changes radically—and strongly supports a tradeoff-based analysis. In general, as psychologists seek to strengthen our science, we must complement our much-needed work on increasing replicability with careful attention to the other features of a high-quality science.},
annote = {This was a really great article pointed out that the recent emphasis on replication may inadvertently lead to compromises in other scientific goals, such as discovery, internal validity, external validity, consequential validity, etc. They advocate for a more thorough discussion and recognition that one size does not fit all.},
author = {Finkel, Eli J. and Eastwick, Paul W. and Reis, Harry T.},
booktitle = {Journal of Personality and Social Psychology},
doi = {10.1037/pspi0000075},
file = {::},
isbn = {9781591473800},
issn = {00223514},
keywords = {Best practices,Big N vs Many Small Studies,Replicability,Research methods,Scientific desiderata,competing goals of a study,cumulative different from replication,cumulative evidence,large N, criticisms of,open access, criticisms of,openness and transparency, not enough,p-hacking prevents insignificance,p-hacking, evidence of,p-hacking, prevalence of,pre-registration and EDA,pre-registration, criticisms,replication, definition of,replication, not enough,stifles creativity,tradeoff of scientific goals},
mendeley-tags = {Big N vs Many Small Studies,competing goals of a study,cumulative different from replication,cumulative evidence,large N, criticisms of,open access, criticisms of,openness and transparency, not enough,p-hacking prevents insignificance,p-hacking, evidence of,p-hacking, prevalence of,pre-registration and EDA,pre-registration, criticisms,replication, definition of,replication, not enough,stifles creativity,tradeoff of scientific goals},
pmid = {28714730},
title = {{Replicability and other features of a high-quality science: Toward a balanced and empirical approach}},
year = {2017}
}
@misc{Baumeister2016,
abstract = {Social psychology's current crisis has prompted calls for larger samples and more replications. Building on Sakaluk's (in this issue) distinction between exploration and confirmation, I argue that this shift will increase correctness of findings, but at the expense of exploration and discovery. The likely effects on the field include aversion to risk, increased difficulty in building careers and hence more capricious hiring and promotion policies, loss of interdisciplinary influence, and rising interest in small, weak findings. Winners (who stand to gain from the mooted changes) include researchers with the patience and requisite resources to assemble large samples; incompetent experimenters; destructive iconoclasts; competing subfields of psychology; and lower-ranked journals, insofar as they publish creative work with small samples. The losers are young researchers; writers of literature reviews and textbooks; flamboyant, creative researchers with lesser levels of patience; and researchers at small colleges. My position is that the field has actually done quite well in recent decades, and improvement should be undertaken as further refinement of a successful approach, in contrast to the Cassandrian view that the field's body of knowledge is hopelessly flawed and radical, revolutionary change is needed. I recommend we retain the exploratory research approach alongside the new, large-sample confirmatory work.},
annote = {This was really hard to read without wanting to vomit. So bad. So so bad. I think the primary grape I have with this, it's not just the denial, but the fact that the author places P values in a prominent position, while also explicitly recognizing that he has been doing ETA all along. ETA is completely inappropriate. Nor is it really ETA, he wasn't even using the right tools.},
author = {Baumeister, Roy F.},
booktitle = {Journal of Experimental Social Psychology},
doi = {10.1016/j.jesp.2016.02.003},
file = {::},
isbn = {9780444537386},
issn = {10960465},
keywords = {EDA and p-values,EDA vs. CDA: misunderstandings,denial,replication: criticisms},
mendeley-tags = {EDA and p-values,EDA vs. CDA: misunderstandings,denial,replication: criticisms},
title = {{Charting the future of social psychology on stormy seas: Winners, losers, and recommendations}},
year = {2016}
}
@article{Etz2018,
abstract = {Predicting the binding mode of flexible polypeptides to proteins is an important task that falls outside the domain of applicability of most small molecule and protein−protein docking tools. Here, we test the small molecule flexible ligand docking program Glide on a set of 19 non-$\alpha$-helical peptides and systematically improve pose prediction accuracy by enhancing Glide sampling for flexible polypeptides. In addition, scoring of the poses was improved by post-processing with physics-based implicit solvent MM- GBSA calculations. Using the best RMSD among the top 10 scoring poses as a metric, the success rate (RMSD ≤ 2.0 {\AA} for the interface backbone atoms) increased from 21{\%} with default Glide SP settings to 58{\%} with the enhanced peptide sampling and scoring protocol in the case of redocking to the native protein structure. This approaches the accuracy of the recently developed Rosetta FlexPepDock method (63{\%} success for these 19 peptides) while being over 100 times faster. Cross-docking was performed for a subset of cases where an unbound receptor structure was available, and in that case, 40{\%} of peptides were docked successfully. We analyze the results and find that the optimized polypeptide protocol is most accurate for extended peptides of limited size and number of formal charges, defining a domain of applicability for this approach.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Etz, Alexander and Gronau, Quentin F. and Dablander, Fabian and Edelsbrunner, Peter A. and Baribault, Beth},
doi = {10.3758/s13423-017-1317-5},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {9788578110796},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
number = {1},
pmid = {25246403},
title = {{How to become a Bayesian in eight easy steps: An annotated reading list}},
volume = {25},
year = {2018}
}
@misc{Ioannidis2016,
abstract = {Policy Points: r Currently, there is massive production of unnecessary, misleading, and conflicted systematic reviews and meta-analyses. Instead of promoting evidence-based medicine and health care, these instruments often serve mostly as easily produced publishable units or marketing tools. r Suboptimal systematic reviews and meta-analyses can be harmful given the major prestige and influence these types of studies have acquired. r The publication of systematic reviews and meta-analyses should be realigned to remove biases and vested interests and to integrate them better with the primary production of evidence. Context: Currently, most systematic reviews and meta-analyses are done retro-spectively with fragmented published information. This article aims to explore the growth of published systematic reviews and meta-analyses and to estimate how often they are redundant, misleading, or serving conflicted interests. Methods: Data included information from PubMed surveys and from empirical evaluations of meta-analyses. Findings: Publication of systematic reviews and meta-analyses has increased rapidly. In the period January 1, 1986, to December 4, 2015, PubMed tags 266,782 items as " systematic reviews " and 58,611 as " meta-analyses. " An-nual publications between 1991 and 2014 increased 2,728{\%} for systematic reviews and 2,635{\%} for meta-analyses versus only 153{\%} for all PubMed-indexed items. Currently, probably more systematic reviews of trials than new randomized trials are published annually. Most topics addressed by meta-analyses of randomized trials have overlapping, redundant meta-analyses; same-topic meta-analyses may exceed 20 sometimes. Some fields produce massive numbers of meta-analyses; for example, 185 meta-analyses of antidepressants for depression were published between 2007 and 2014. These meta-analyses are often produced either by industry employees or by authors with industry ties and results are aligned with sponsor interests. China has rapidly become the most prolific producer of English-language, PubMed-indexed meta-analyses. The most massive presence of Chinese meta-analyses is on genetic associations (63{\%} of global production in 2014), where almost all results are misleading since they combine fragmented information from mostly abandoned era of can-didate genes. Furthermore, many contracting companies working on evidence synthesis receive industry contracts to produce meta-analyses, many of which probably remain unpublished. Many other meta-analyses have serious flaws. Of the remaining, most have weak or insufficient evidence to inform decision making. Few systematic reviews and meta-analyses are both non-misleading and useful. Conclusions: The production of systematic reviews and meta-analyses has reached epidemic proportions. Possibly, the large majority of produced system-atic reviews and meta-analyses are unnecessary, misleading, and/or conflicted.},
author = {Ioannidis, John P.A.},
booktitle = {Milbank Quarterly},
doi = {10.1111/1468-0009.12210},
file = {::},
isbn = {1468-0009 (Electronic) 0887-378X (Linking)},
issn = {14680009},
number = {3},
pmid = {27620683},
title = {{The Mass Production of Redundant, Misleading, and Conflicted Systematic Reviews and Meta-analyses}},
volume = {94},
year = {2016}
}
@article{Simonsohn2014,
abstract = {Because scientists tend to report only studies (publication bias) or analyses (p-hacking) that "work," readers must ask, "Are these effects true, or do they merely reflect selective reporting?" We introduce p-curve as a way to answer this question. P-curve is the distribution of statistically significant p values for a set of studies (ps {\textless} .05). Because only true effects are expected to generate right-skewed p-curves-containing more low (.01s) than high (.04s) significant p values--only right-skewed p--curves are diagnostic of evidential value. By telling us whether we can rule out selective reporting as the sole explanation for a set of findings, p-curve offers a solution to the age-old inferential problems caused by file-drawers of failed studies and analyses.},
author = {Simonsohn, Uri and Nelson, Leif D. and Simmons, Joseph P.},
doi = {10.1037/a0033242},
isbn = {0096-3445},
issn = {00963445},
journal = {Journal of Experimental Psychology: General},
number = {2},
pmid = {23855496},
title = {{P-curve: A key to the file-drawer}},
volume = {143},
year = {2014}
}
@misc{Gilbert2016,
abstract = {A paper from the Open Science Collaboration (Research Articles, 28 August 2015, aac4716) attempting to replicate 100 published studies suggests that the reproducibility of psychological science is surprisingly low.We show that this article contains three statistical errors and provides no support for such a conclusion. Indeed, the data are consistent with the opposite conclusion, namely, that the reproducibility of psychological science is quite high.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Gilbert, Daniel T. and King, Gary and Pettigrew, Stephen and Wilson, Timothy D.},
booktitle = {Science},
doi = {10.1126/science.aad7243},
eprint = {arXiv:1011.1669v3},
isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)},
issn = {10959203},
number = {6277},
pmid = {26315443},
title = {{Comment on "Estimating the reproducibility of psychological science"}},
volume = {351},
year = {2016}
}
@article{Spence2018,
author = {Spence, Jeffrey R. and Stanley, David and Newby-Clark, Ian},
file = {::},
keywords = {reform,reform, resources,reform, student training},
mendeley-tags = {reform,reform, resources,reform, student training},
pages = {2--5},
title = {{Why students are the answer to psychology's replication crisis}},
url = {http://theconversation.com/why-students-are-the-answer-to-psychologys-replication-crisis-90286},
year = {2018}
}
@misc{tackling_crisis,
title = {{Are psychologists tackling the replication crisis head on? – inquisitivetortoise}},
url = {https://inquisitivetortoise.wordpress.com/2018/02/04/are-psychologists-tackling-the-replication-crisis-head-on/},
urldate = {2018-10-02}
}
@article{Baker2016,
author = {Baker, Monya},
doi = {10.1038/nature.2016.19498},
issn = {1476-4687},
journal = {Nature},
month = {mar},
title = {{Psychology's reproducibility problem is exaggerated – say psychologists}},
url = {http://www.nature.com/doifinder/10.1038/nature.2016.19498},
year = {2016}
}
@article{Stanley2014,
abstract = {Failures to replicate published psychological research findings have contributed to a “crisis of confidence.” Several reasons for these failures have been proposed, the most notable being questionable research practices and data fraud. We examine replication from a different perspective and illustrate that current intuitive expectations for replication are unreasonable. We used computer simulations to create thousands of ideal replications, with the same participants, wherein the only difference across replications was random measurement error. In the first set of simulations, study results differed substantially across replications as a result of measurement error alone. This raises questions about how researchers should interpret failed replication attempts, given the large impact that even modest amounts of measurement error can have on observed associations. In the second set of simulations, we illustrated the difficulties that researchers face when trying to interpret and replicate a published findin...},
author = {Stanley, David J. and Spence, Jeffrey R.},
doi = {10.1177/1745691614528518},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {individual differences,meta-analysis,methodology,reliability,replication,scientific},
month = {may},
number = {3},
pages = {305--318},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Expectations for Replications}},
url = {http://journals.sagepub.com/doi/10.1177/1745691614528518},
volume = {9},
year = {2014}
}
@book{Tukey1977,
address = {Reading, MA},
author = {Tukey, John W.},
keywords = {EDA: Rules for use,EDA: philosophy,EDA: tenets},
mendeley-tags = {EDA: Rules for use,EDA: philosophy,EDA: tenets},
publisher = {Addison-Wesley Publishing Company},
title = {{Exploratory Data Analysis}},
year = {1977}
}
@article{Gelman2017,
abstract = {The article discusses the replication crisis in applied statistics in which studies published in top scientific journals are unable to reproduce published claims. Topics covered include the Excel error of economists Carmen Reinhart and Kenneth Rogoff, the 2008 book "Red State Blue State," and social factors that were predictive of the sex of children.},
author = {Gelman, Andrew},
doi = {10.1080/09332480.2017.1302720},
issn = {0933-2480},
journal = {CHANCE},
keywords = {not enough,replication},
mendeley-tags = {not enough,replication},
pmid = {121746219},
title = {{Ethics and Statistics: Honesty and Transparency Are Not Enough}},
year = {2017}
}
@article{Haller2002,
abstract = {The use of significance tests in science has been debated from the invention of these tests until the present time. Apart from theoretical critiques on their appropriateness for evaluating scientific hypotheses, significance tests also receive criticism for inviting misinterpretations. We presented six common misinterpretations to psychologists who work in German universities and found out that they are still surprisingly widespread-even among instructors who teach statistics to psychology students. Although these misinterpretations are well documented among students, until now there has been little research on pedagogical methods to remove them. Rather, they are considered "hard facts" that are impervious to correction. We discuss the roots of these misinterpretations and propose a pedagogical concept to teach significance tests, which involves explaining the meaning of statistical significance in an appropriate way.},
author = {Haller, Heiko and Krauss, Stefan},
file = {::},
journal = {Methods of Psychological Research Online},
number = {1},
title = {{Misinterpretations of Significance: A Problem Students Share with Their Teachers?}},
url = {https://www.metheval.uni-jena.de/lehre/0405-ws/evaluationuebung/haller.pdf},
volume = {7},
year = {2002}
}
@article{Pashler2012a,
author = {Pashler, Harold and Wagenmakers, Eric-Jan},
doi = {10.1177/1745691612465253},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
month = {nov},
number = {6},
pages = {528--530},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Editors' Introduction to the Special Section on Replicability in Psychological Science}},
url = {http://journals.sagepub.com/doi/10.1177/1745691612465253},
volume = {7},
year = {2012}
}
@article{Cumming2014,
abstract = {We need to make substantial changes to how we conduct research. First, in response to heightened concern that our published research literature is incomplete and untrustworthy, we need new requirements to ensure research integrity. These include prespecification of studies whenever possible, avoidance of selection and other inappropriate data-analytic practices, complete reporting, and encouragement of replication. Second, in response to renewed recognition of the severe flaws of null-hypothesis significance testing (NHST), we need to shift from reliance on NHST to estimation and other preferred techniques. The new statistics refers to recommended practices, including estimation based on effect sizes, confidence intervals, and meta-analysis. The techniques are not new, but adopting them widely would be new for many researchers, as well as highly beneficial. This article explains why the new statistics are important and offers guidance for their use. It describes an eight-step new-statistics strategy for research with integrity, which starts with formulation of research questions in estimation terms, has no place for NHST, and is aimed at building a cumulative quantitative discipline.},
author = {Cumming, Geoff},
doi = {10.1177/0956797613504966},
isbn = {1467-9280 (Electronic)$\backslash$r0956-7976 (Linking)},
issn = {14679280},
journal = {Psychological Science},
keywords = {estimation,meta-analysis,replication,research integrity,research methods,statistical analysis,the new statistics},
pmid = {24220629},
title = {{The New Statistics: Why and How}},
year = {2014}
}
@article{Ioannidis2005,
abstract = {Summary There is increasing concern that most current published research findings are false. The probability that a research claim is true may depend on study power and bias, the number of other studies on the same question, and, importantly, the ratio of true to no relationships among the relationships probed in each scientific field. In this framework, a research finding is less likely to be true when the studies conducted in a field are smaller; when effect sizes are smaller; when there is a greater number and lesser preselection of tested relationships; where there is greater flexibility in designs, definitions, outcomes, and analytical modes; when there is greater financial and other interest and prejudice; and when more teams are involved in a scientific field in chase of statistical significance. Simulations show that for most study designs and settings, it is more likely for a research claim to be false than true. Moreover, for many current scientific fields, claimed research findings may often be simply accurate measures of the prevailing bias. In this essay, I discuss the implications of these problems for the conduct and interpretation of research.},
annote = {Nothing terribly unique. The author does use a 2x2 table of false positives/negatives to argue that most findings are false. That was interesting.},
author = {Ioannidis, John P. A.},
doi = {10.1371/journal.pmed.0020124},
file = {::},
issn = {1549-1676},
journal = {PLoS Medicine},
month = {aug},
number = {8},
pages = {e124},
publisher = {Public Library of Science},
title = {{Why Most Published Research Findings Are False}},
url = {http://dx.plos.org/10.1371/journal.pmed.0020124},
volume = {2},
year = {2005}
}
@article{Nelson2018,
abstract = {In 2010-2012, a few largely coincidental events led experimental psychologists to realize that their approach to collecting, analyzing, and reporting data made it too easy to publish false-positive findings. This sparked a period of methodological reflection that we review here and call Psy-chology's Renaissance. We begin by describing how psychologists' concerns with publication bias shifted from worrying about file-drawered studies to worrying about p-hacked analyses. We then review the methodological changes that psychologists have proposed and, in some cases, embraced. In describing how the renaissance has unfolded, we attempt to describe different points of view fairly but not neutrally, so as to identify the most promising paths forward. In so doing, we champion disclosure and preregistration, express skepticism about most statistical solutions to publication bias, take positions on the analysis and interpretation of replication failures, and contend that meta-analytical thinking increases the prevalence of false positives. Our general thesis is that the scientific practices of experimental psychologists have improved dramatically. 511},
annote = {This article places an overemphasis on openness, transparency, and data policing. Yes, these will decrease false positives. However, we will soon have a second replication crisis, where we realize nothing is replicable. The problem is that this article still relies on NHST methodology which doesn't address the issues they want to address. It is ill equipped to assess replicability or even invite a cumulative view of science. That's what's missing here. 

Again, the authors still seem to think that one study can be conclusive. No amount of positive studies will prove an effect, nor will negative studies disprove it. Yet they treat one failed replication as inconclusive due to context dependent variables. Probably not. Probably it's just noise.

That's what's missing here: they fail to account for noise. NHST sucks at recognizing noise, mostly because it's non cumulative. Bayesian is better, but best when it's used over multiple studies.},
author = {Nelson, Leif D. and Simmons, Joseph P. and Simonsohn, Uri},
doi = {10.1146/annurev-psych-122216},
file = {::},
journal = {Annual Review of Psychology},
keywords = {bayesian analysis and,burden of proof,data audits,definition of,errors of analysis,ethics,evidence against,evidence of,failed replications,false positives,identifying,intentions,meta-analysis: evidence against,methodology,multiple testing,overcoming file drawer problem,p-hacking,p-hacking prevents insignificance,p-hacking: cause of replication crisis,p-value bashing,p-values,p-values and replication,pre-registration and EDA,preregistration,prevalence of,publication bias,renaissance,replication},
mendeley-tags = {bayesian analysis and,burden of proof,data audits,definition of,errors of analysis,ethics,evidence against,evidence of,failed replications,identifying,intentions,meta-analysis: evidence against,multiple testing,overcoming file drawer problem,p-hacking,p-hacking prevents insignificance,p-hacking: cause of replication crisis,p-value bashing,p-values,p-values and replication,pre-registration and EDA,prevalence of,replication},
pages = {511--545},
title = {{Psychology's Renaissance}},
url = {https://doi.org/10.1146/annurev-psych-122216-},
volume = {69},
year = {2018}
}
@techreport{Kruschke,
author = {Kruschke, John K},
file = {::},
title = {{Doing Bayesian Data Analysis: A Tutorial with R and BUGS}},
url = {http://www.users.csbsju.edu/{~}mgass/robert.pdf}
}
@misc{Behrens1996,
abstract = {describes several contexts in which data analysis occurs in educational psychology and enumerates aspects of data analysis that are consistent across all approaches to learning from data / discusses quantitative methods, including ways of conceptualizing quantitative data analysis / reviews important schools of thought and techniques / discusses computer programs for quantitative analysis / identifies the characteristics of qualitative inquiry / reviews 3 approaches to data analysis in this tradition [Erickson's analytic induction, grounded theory, and M. B. Miles and A. M. Huberman's 3-part analysis / discusses the use of computers in this context / considers the possibilities and challenges for contemporary data analysts discusses some of the foundational concepts and techniques in quantitative methods and introduces some uncommon methods / the section on qualitative methods focuses on introducing these methods to those who may not have worked with these tools / discussion of quantitative methods reflects the current emphasis of the educational psychology research community and what we believe educational psychologists, on average, are ready to hear, rather than weighing either method as more or less important (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {London, England},
author = {Behrens, John T and Smith, Mary Lee},
booktitle = {Handbook of educational psychology.},
file = {::},
isbn = {0-02-897089-6 (Hardcover)},
keywords = {*Computer Software,*Educational Psychology,*Methodology,*Statistical Analysis,Statistical Measurement},
pages = {945--989},
publisher = {Prentice Hall International},
title = {{Data and data analysis.}},
year = {1996}
}
@article{Schmidt2016,
annote = {Schmidt certainly has a high sense of self importance. He's basically making the point that replication is not the answer: meta-analyses are. As such, we should be very concerned about questionable research practices. In addition, he also highlights the fact that no single study is definitive.

His greatest insight is that replications aren't really needed: people do meta-analyses all the time without exact replication is. These meta-analyses better help to identify the generalizability of results.},
author = {Schmidt, Frank L. and Oh, In-Sue},
doi = {10.1037/arc0000029},
file = {::},
issn = {2169-3269},
journal = {Archives of Scientific Psychology},
month = {jun},
number = {1},
pages = {32--37},
title = {{The crisis of confidence in research findings in psychology: Is lack of replication the real problem? Or is it something else?}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/arc0000029},
volume = {4},
year = {2016}
}
@article{Tackett2017,
abstract = {Psychology is in the early stages of examining a crisis of replicability stemming from several high-profile failures to replicate studies in experimental psychology. This important conversation has largely been focused on social psychology, with some active participation from cognitive psychology. Nevertheless, several other major domains of psychological science—including clinical science—have remained insulated from this discussion. The goals of this article are to (a) examine why clinical psychology and allied fields, such as counseling and school psychology, have not been central participants in the replicability conversation; (b) review concerns and recommendations that are less (or more) applicable to or appropriate for research in clinical psychology and allied fields; and (c) generate take-home messages for scholars and consumers of the literature in clinical psychology and allied fields, as well as reviewers, editors, and colleagues from other areas of psychological science.},
author = {Tackett, Jennifer L. and Lilienfeld, Scott O. and Patrick, Christopher J. and Johnson, Sheri L. and Krueger, Robert F. and Miller, Joshua D. and Oltmanns, Thomas F. and Shrout, Patrick E.},
doi = {10.1177/1745691617690042},
file = {::},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {assessment,diagnosis,disorders,scientific methodology},
month = {sep},
number = {5},
pages = {742--756},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{It's Time to Broaden the Replicability Conversation: Thoughts for and From Clinical Psychological Science}},
url = {http://journals.sagepub.com/doi/10.1177/1745691617690042},
volume = {12},
year = {2017}
}
@article{Vanpaemel2015,
abstract = {To study the availability of psychological research data, we requested data from 394 papers, published in all issues of four APA journals in 2012. We found that 38{\%} of the researchers sent their data immediately or after reminders. These findings are in line with estimates of the willingness to share data in psychology from the recent or remote past. Although the recent crisis of confidence that shook psychology has highlighted the importance of open research practices, and technical developments have greatly facilitated data sharing, our findings make clear that psychology is nowhere close to being an open science.},
author = {Vanpaemel, Wolf and Vermorgen, Maarten and Deriemaecker, Leen and Storms, Gert},
doi = {10.1525/collabra.13},
file = {::},
issn = {2376-6832},
journal = {Collabra},
keywords = {Data-sharing,open science,questionable research practices,replicability crisis,research integrity},
month = {oct},
number = {1},
publisher = {The Regents of the University of California},
title = {{Are We Wasting a Good Crisis? The Availability of Psychological Research Data after the Storm}},
url = {http://collabra.org/article/view/collabra.13/},
volume = {1},
year = {2015}
}
@article{Levenson2017,
abstract = {The state of psychological science is considered in terms of current issues and suggestions for the future.},
annote = {Not the best article. He talks about the replication crisis, but then goes into five suggestions mostly related to clinical psychology. I don't see the relevance of these suggestions to the replication crisis.},
author = {Levenson, Robert W.},
doi = {10.1177/1745691617706507},
file = {::},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {careers,grant funding: problems of,openness and transparency, not enough,replication,replication crisis and convenience sampling,replication crisis: causes,replication, not enough,research design},
mendeley-tags = {grant funding: problems of,openness and transparency, not enough,replication crisis and convenience sampling,replication crisis: causes,replication, not enough},
month = {jul},
number = {4},
pages = {675--679},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Do You Believe the Field of Psychological Science Is Headed in the Right Direction?}},
url = {http://journals.sagepub.com/doi/10.1177/1745691617706507},
volume = {12},
year = {2017}
}
@article{McShane2017a,
abstract = {In light of recent concerns about reproducibility and replicability, the ASA issued a Statement on Statistical Significance and p-values aimed at those who are not primarily statisticians. While the ASA State-ment notes that statistical significance and p-values are " commonly misused and misinterpreted, " it does not discuss and document broader implications of these errors for the interpretation of evidence. In this article, we review research on how applied researchers who are not primarily statisticians misuse and misin-terpret p-values in practice and how this can lead to errors in the interpretation of evidence. We also present new data showing, perhaps surprisingly, that researchers who are primarily statisticians are also prone to misuse and misinterpret p-values thus resulting in similar errors. In particular, we show that statisticians tend to interpret evidence dichotomously based on whether or not a p-value crosses the conventional 0.05 threshold for statistical significance. We discuss implications and offer recommendations.},
author = {McShane, Blakeley B. and Gal, David},
file = {::},
journal = {Journal of the American Statistical Association},
title = {{Statistical Significance and the Dichotomization of Evidence}},
year = {2017}
}
@article{Francis2012,
abstract = {Like other scientists, psychologists believe experimental replication to be the final arbiter for determining the validity of an empirical finding. Reports in psychology journals often attempt to prove the validity of a hypothesis or theory with multiple experiments that replicate a finding. Unfortunately, these efforts are sometimes misguided because in a field like experimental psychology, ever more successful replication does not necessarily ensure the validity of an empirical finding. When psychological experiments are analyzed with statistics, the rules of probability dictate that random samples should sometimes be selected that do not reject the null hypothesis, even if an effect is real. As a result, it is possible for a set of experiments to have too many successful replications. When there are too many successful replications for a given set of experiments, a skeptical scientist should be suspicious that null or negative findings have been suppressed, the experiments were run improperly, or the experiments were analyzed improperly. This article describes the implications of this observation and demonstrates how to test for too much successful replication by using a set of experiments from a recent research paper.},
author = {Francis, Gregory},
journal = {Perspectives on Psychological Science},
title = {{The Psychology of Replication and Replication in Psychology}},
year = {2012}
}
@article{Ioannidis2012,
abstract = {The ability to self-correct is considered a hallmark of science. However, self-correction does not always happen to scientific evidence by default. The trajectory of scientific credibility can fluctuate over time, both for defined scientific fields and for science at-large. History suggests that major catastrophes in scientific credibility are unfortunately possible and the argument that “it is obvious that progress is made” is weak. Careful evaluation of the current status of credibility of various scientific fields is important in order to understand any credibility deficits and how one could obtain and establish more trustworthy results. Efficient and unbiased replication mechanisms are essential for maintaining high levels of scientific credibility. Depending on the types of results obtained in the discovery and replication phases, there are different paradigms of research: optimal, self-correcting, false nonreplication, and perpetuated fallacy. In the absence of replication efforts, one is left with unconfirmed (genuine) discoveries and unchallenged fallacies. In several fields of investigation, including many areas of psychological science, perpetuated and unchallenged fallacies may comprise the majority of the circulating evidence. I catalogue a number of impediments to self-correction that have been empirically studied in psychological science. Finally, I discuss some proposed solutions to promote sound replication practices enhancing the credibility of scientific results as well as some potential disadvantages of each of them. Any deviation from the principle that seeking the truth has priority over any other goals may be seriously damaging to the self-correcting functions of science.},
author = {Ioannidis, John P.A.},
file = {::},
journal = {Perspectives on Psychological Science},
title = {{Why Science Is Not Necessarily Self-Correcting}},
year = {2012}
}
@article{Perezgonzalez2014,
abstract = {Significance testing has been controversial since Neyman and Pearson published their procedure for testing statistical hypotheses. Fisher, who popularized tests of significance, first noticed the emerging confusion between that procedure and his own, yet he could not stop their hybridization into what is nowadays known as Null Hypothesis Significance Testing (NHST). Here I hypothesize why similar attempts to clarify matters have also failed; namely because both procedures are designed to be confused: their names may not match purpose, both use null hypotheses and levels of significance yet for different goals, and p-values, errors, alternative hypotheses, and significance only apply to one procedure yet are commonly used with both. I also propose a reconceptualization of the procedures to prevent further confusion.},
author = {Perezgonzalez, Jose D.},
journal = {Theory {\&} Psychology},
title = {{A reconceptualization of significance testing}},
year = {2014}
}
@article{Nelson2018a,
abstract = {In 2010–2012, a few largely coincidental events led experimental psychologists to realize that their approach to collecting, analyzing, and reporting data made it too easy to publish false-positive findings. This sparked a period of methodological reflection that we review here and call Psychology's Renaissance. We begin by describing how psychologists' concerns with publication bias shifted from worrying about file-drawered studies to worrying about p-hacked analyses. We then review the methodological changes that psychologists have proposed and, in some cases, embraced. In describing how the renaissance has unfolded, we attempt to describe different points of view fairly but not neutrally, so as to identify the most promising paths forward. In so doing, we champion disclosure and preregistration, express skepticism about most statistical solutions to publication bias, take positions on the analysis and interpretation of replication failures, and contend that meta-analytical thinking increases the prevalence of false positives. Our general thesis is that the scientific practices of experimental psychologists have improved dramatically.},
author = {Nelson, Leif D. and Simmons, Joseph P. and Simonsohn, Uri},
file = {::},
journal = {SSRN},
title = {{Psychology's Renaissance}},
year = {2018}
}
@article{Haig2017,
abstract = {This article considers the nature and place of tests of statistical significance (ToSS) in science, with particular reference to psychology. Despite the enormous amount of attention given to this topic, psychology's understanding of ToSS remains deficient. The major problem stems from a widespread and uncritical acceptance of null hypothesis significance testing (NHST), which is an indefensible amalgam of ideas adapted from Fisher's thinking on the subject and from Neyman and Pearson's alternative account. To correct for the deficiencies of the hybrid, it is suggested that psychology avail itself of two important and more recent viewpoints on ToSS, namely the neo-Fisherian and the error-statistical perspectives. The neo-Fisherian perspective endeavors to improve on Fisher's original account and rejects key elements of Neyman and Pearson's alternative. In contrast, the error-statistical perspective builds on the strengths of both statistical traditions. It is suggested that these more recent outlooks on ToSS are a definite improvement on NHST, especially the error-statistical position. It is suggested that ToSS can play a useful, if limited, role in psychological research. At the end, some lessons learnt from the extensive debates about ToSS are presented.},
author = {Haig, Brian D.},
journal = {Educational and Psychological Measurement},
title = {{Tests of Statistical Significance Made Sound}},
year = {2017}
}
@article{Simmons2011a,
abstract = {In this article, we accomplish two things. First, we show that despite empirical psychologists' nominal endorsement of a low rate of false-positive findings (≤ .05), flexibility in data collection, analysis, and reporting dramatically increases actual false-positive rates. In many cases, a researcher is more likely to falsely find evidence that an effect exists than to correctly find evidence that it does not. We present computer simulations and a pair of actual experiments that demonstrate how unacceptably easy it is to accumulate (and report) statistically significant evidence for a false hypothesis. Second, we suggest a simple, low-cost, and straightforwardly effective disclosure-based solution to this problem. The solution involves six concrete requirements for authors and four guidelines for reviewers, all of which impose a minimal burden on the publication process.},
author = {Simmons, Joseph P. and Nelson, Leif D. and Simonsohn, Uri},
file = {::},
journal = {Psychological Science},
title = {{False-positive psychology: Undisclosed flexibility in data collection and analysis allows presenting anything as significant}},
year = {2011}
}
@article{Haggstrom2017,
abstract = {Null hypothesis significance testing (NHST) provides an important statistical toolbox, but there are a number of ways in which it is often abused and misinterpreted, with bad consequences for the reliability and progress of science. Parts of contemporary NHST debate, especially in the psychological sciences, is reviewed, and a suggestion is made that a new distinction between strongly, weakly, and very weakly anti-NHST posi-tions is likely to bring added clarity to the debate.},
author = {H{\"{a}}ggstr{\"{o}}m, Olle},
file = {::},
journal = {Educational and Psychological Measurement},
title = {{The Need for Nuance in the Null Hypothesis Significance Testing Debate}},
year = {2017}
}
@article{Cortina2011a,
abstract = {Continued discussion and debate regarding the appropriate use of null hypothesis significance testing (NHST) has led to greater reliance on effect size testing (EST) in published literature. This article examines the myth that uncritical replacement of NHST with EST will improve our science. The use of NHST and EST is described along with a summary of the arguments offered in support and against both. After addressing the veracity of these assertions, the article describes the concept of the translation mechanism and compares the success of NHST and EST as mechanisms. Finally, the authors suggest changes that may facilitate translation in future research.},
author = {Cortina, Jose M. and Landis, Ronald S.},
doi = {10.1177/1094428110391542},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Cortina, Landis - 2011 - The Earth Is iNoti Round ( ipi = .00).pdf:pdf},
journal = {Organizational Research Methods},
number = {2},
pages = {332--349},
title = {{The earth is not round (p = .00)}},
volume = {14},
year = {2011}
}
@article{Hardin2015,
abstract = {A growing number of students are completing undergraduate degrees in statistics and entering the workforce as data analysts. In these positions, they are expected to understand how to utilize databases and other data warehouses, scrape data from Internet sources, program solutions to complex problems in multiple languages, and think algorithmically as well as statistically. These data science topics have not traditionally been a major component of undergraduate programs in statistics. Consequently, a curricular shift is needed to address additional learning outcomes. The goal of this paper is to motivate the importance of data science proficiency and to provide examples and resources for instructors to implement data science in their own statistics curricula. We provide case studies from seven institutions. These varied approaches to teaching data science demonstrate curricular innovations to address new needs. Also included here are examples of assignments designed for courses that foster engaging of undergraduates with data and data science.},
author = {Hardin, J. and Hoerl, R. and Horton, Nicholas J. and Nolan, D. and Baumer, B. and Hall-Holt, O. and Murrell, P. and Peng, R. and Roback, P. and {Temple Lang}, D. and Ward, M. D.},
file = {::},
journal = {American Statistician},
title = {{Data Science in Statistics Curricula: Preparing Students to “Think with Data”}},
year = {2015}
}
@article{Kosslyn2012,
abstract = {Graphs have become a fixture of everyday life, used in scientific and business publications, in magazines and newspapers, on television, on billboards, and even on cereal boxes. Nonetheless, surprisingly few graphs communicate effectively, and most graphs fail because they do not take into account the goals, needs, and abilities of the viewers. In Graph Design for Eye and Mind, Stephen Kosslyn addresses these problems by presenting eight psychological principles for constructing effective graphs. Each principle is solidly rooted both in the scientific literature on how we perceive and comprehend graphs and in general facts about how our eyes and brains process visual information. Kosslyn then uses these eight psychological principles as the basis for hundreds of specific recommendations that serve as a concrete, step-by-step guide to deciding whether a graph is an appropriate display to use, choosing the correct type of graph for a specific type of data and message, and then constructing graphs that will be understood at a glance. Kosslyn also includes a complete review of the scientific literature on graph perception and comprehension, and appendices that provide a quick tutorial on basic statistics and a checklist for evaluating computer-graphics programs. Graph Design for Eye and Mind is an invaluable reference for anyone who uses visual displays to convey information in the sciences, humanities, and businesses such as finance, marketing, and advertising.},
author = {Kosslyn, Stephen M.},
journal = {Graph Design for the Eye and Mind},
title = {{Graph Design for the Eye and Mind}},
year = {2012}
}
@article{Lakens2018,
abstract = {In response to recommendations to redefine statistical significance to P ≤ 0.005, we propose that researchers should transparently report and justify all choices they make when designing a study, including the alpha level.},
author = {Lakens, Daniel and Adolfi, Federico G. and Albers, Casper J. and Anvari, Farid and Apps, Matthew A.J. and Argamon, Shlomo E. and Baguley, Thom and Becker, Raymond B. and Benning, Stephen D. and Bradford, Daniel E. and Buchanan, Erin M. and Caldwell, Aaron R. and {Van Calster}, Ben and Carlsson, Rickard and Chen, Sau Chin and Chung, Bryan and Colling, Lincoln J. and Collins, Gary S. and Crook, Zander and Cross, Emily S. and Daniels, Sameera and Danielsson, Henrik and Debruine, Lisa and Dunleavy, Daniel J. and Earp, Brian D.},
journal = {Nature Human Behaviour},
title = {{Justify your alpha}},
year = {2018}
}
@article{Feinberg2011,
abstract = { In this article we survey the display formats used in the Journal of Computational and Graphical Statistics during the period 2005–2010 and discover that the most dominant format was the table. We then examine the actual tables used and find that most could have been made more comprehensible had they utilized one or more of three simple rules for table construction. We illustrate these rules on tables drawn from the Journal and elsewhere. },
author = {Feinberg, Richard A. and Wainer, Howard},
journal = {Journal of Computational and Graphical Statistics},
title = {{Extracting sunbeams from cucumbers}},
year = {2011}
}
@inproceedings{tukey1972exploratory,
author = {Tukey, John W.},
booktitle = {18th Conference on Design of Experiments in Army Research and Development I},
pages = {1--10},
title = {{Exploratory data analysis: as part of a larger whole}},
volume = {1010},
year = {1973}
}
@inproceedings{Tukey1972a,
author = {Tukey, John W.},
booktitle = {Proceedings of the 18th conference on design of experiments in Army research and development I. Washington, DC},
file = {::},
pages = {1--10},
title = {{Exploratory Data analysis: as part of a larger whole}},
year = {1972}
}
@article{Wainer2011,
abstract = {Journal of Computational and Graphical Statistics, 2011, Volume20, Number1, 8-15},
author = {Wainer, Howard},
doi = {10.1198/jcgs.2011.09166c},
file = {::},
issn = {10618600},
journal = {Journal of Computational and Graphical Statistics},
keywords = {data mining origins,graphics, superiority of,theory-data link},
mendeley-tags = {data mining origins,graphics, superiority of,theory-data link},
number = {1},
pages = {8--15},
title = {{Comment}},
volume = {20},
year = {2011}
}
@article{Kruschke2018,
abstract = {In the practice of data analysis, there is a conceptual distinction between hypothesis testing, on the one hand, and estimation with quantified uncertainty, on the other hand. Among frequentists in psychology a shift of emphasis from hypothesis testing to estimation has been dubbed "the New Statistics" (Cumming, 2014). A second conceptual distinction is between frequentist methods and Bayesian methods. Our main goal in this article is to explain how Bayesian methods achieve the goals of the New Statistics better than frequentist methods. The article reviews frequentist and Bayesian approaches to hypothesis testing and to estimation with confidence or credible intervals. The article also describes Bayesian approaches to meta-analysis, randomized controlled trials, and power analysis.},
author = {Kruschke, John K. and Liddell, Torrin M.},
doi = {10.3758/s13423-016-1221-4},
file = {::},
isbn = {1531-5320},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
keywords = {bayesian: point estimate priors,bayesian: priors: importance of,bayesian: sampling distributions,confidence intervals, intentions,confidence intervals, multiple testing,cumulative evidence,hypotheses:specific,likelihood principal,meehl,p-values, intentions,p-values, multiple testing,project: specific hypotheses,region of practical equivalence,uncertainty, value of},
mendeley-tags = {bayesian: point estimate priors,bayesian: priors: importance of,bayesian: sampling distributions,confidence intervals, intentions,confidence intervals, multiple testing,cumulative evidence,hypotheses:specific,likelihood principal,meehl,p-values, intentions,p-values, multiple testing,project: specific hypotheses,region of practical equivalence,uncertainty, value of},
number = {1},
pmid = {28176294},
title = {{The Bayesian New Statistics: Hypothesis testing, estimation, meta-analysis, and power analysis from a Bayesian perspective}},
volume = {25},
year = {2018}
}
@misc{Goodman2016,
abstract = {Imagine the American Physical Society convening a panel of experts to issue a missive to the scientific community on the difference between weight and mass. And imagine that the impetus for such a message was a recognition that engineers and builders had been confusing these concepts for decades, making bridges, buildings, and other components of our physical infrastructure much weaker than previously suspected.},
author = {Goodman, Steven N.},
booktitle = {Science},
doi = {10.1126/science.aaf5406},
file = {::},
isbn = {0036-8075, 1095-9203},
issn = {10959203},
keywords = {decision criteria, universal,decision, theory driven,p-values and replication,statistical toolbox},
mendeley-tags = {decision criteria, universal,decision, theory driven,p-values and replication,statistical toolbox},
number = {6290},
pmid = {27257246},
title = {{Aligning statistical and scientific reasoning}},
volume = {352},
year = {2016}
}
@article{Dienes2018,
abstract = {Inference using significance testing and Bayes fac-tors is compared and contrasted in five case studies based on real research. The first study illustrates that the methods will often agree, both in motivating researchers to conclude that H1 is supported better than H0, and the other way round, that H0 is better supported than H1. The next four, however, show that the methods will also often disagree. In these cases, the aim of the paper will be to motivate the sensible evidential conclusion, and then see which approach matches those intu-itions. Specifically, it is shown that a high-powered non-sig-nificant result is consistent with no evidence for H0 over H1 worth mentioning, which a Bayes factor can show, and, con-versely, that a low-powered non-significant result is consistent with substantial evidence for H0 over H1, again indicated by Bayesian analyses. The fourth study illustrates that a high-powered significant result may not amount to any evidence for H1 over H0, matching the Bayesian conclusion. Finally, the fifth study illustrates that different theories can be eviden-tially supported to different degrees by the same data; a fact that P-values cannot reflect but Bayes factors can. It is argued that appropriate conclusions match the Bayesian inferences, but not those based on significance testing, where they disagree.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Dienes, Zoltan and Mclatchie, Neil},
doi = {10.3758/s13423-017-1266-z},
eprint = {arXiv:1011.1669v3},
file = {::},
isbn = {9788578110796},
issn = {15315320},
journal = {Psychonomic Bulletin and Review},
keywords = {auxiliary assumptions,bayes factor,benchmarks,cohen's d, distribution of,decision criteria, universal,effect sizes, unstandardized,hypotheses:specific,priors, defenses of,priors, obtained from literature,theory-data link,transparency, inviting criticism},
mendeley-tags = {auxiliary assumptions,bayes factor,benchmarks,cohen's d, distribution of,decision criteria, universal,effect sizes, unstandardized,hypotheses:specific,priors, defenses of,priors, obtained from literature,theory-data link,transparency, inviting criticism},
number = {1},
pmid = {25246403},
title = {{Four reasons to prefer Bayesian analyses over significance testing}},
volume = {25},
year = {2018}
}
@article{Healey1998,
abstract = {Presents a new method for using texture to visualize multi-dimensional data elements arranged on an underlying 3D height field. We hope to use simple texture patterns in combination with other visual features like hue and intensity to increase the number of attribute values we can display simultaneously. Our technique builds perceptual texture elements (or pexels) to represent each data element. Attribute values encoded in the data element are used to vary the appearance of a corresponding pexel. Texture patterns that form when the pexels are displayed can be used to rapidly and accurately explore the dataset. Our pexels are built by controlling three separate texture dimensions: height, density and regularity. Results from computer graphics, computer vision and cognitive psychology have identified these dimensions as important for the formation of perceptual texture patterns. We conducted a set of controlled experiments to measure the effectiveness of these dimensions, and to identify any visual interference that may occur when all three are displayed simultaneously at the same spatial location. Results from our experiments show that these dimensions can be used in specific combinations to form perceptual textures for visualizing multidimensional datasets. We demonstrate the effectiveness of our technique by applying it to two real-world visualization environments: tracking typhoon activity in southeast Asia, and analyzing ocean conditions in the northern Pacific.},
author = {Healey, C.G. and Enns, J.T.},
doi = {10.1109/VISUAL.1998.745292},
file = {::},
isbn = {0-8186-9176-X},
issn = {1},
journal = {Proceedings Visualization '98 (Cat. No.98CB36276)},
keywords = {3D height field,Asia,Computer graphics,Computer vision,Data visualization,Displays,Interference,Multidimensional systems,Psychology,Sea measurements,Typhoons,cognitive psychology,computer graphics,computer vision,data visualisation,density,experimental design,geophysics computing,hue,human factors,human vision,image texture,intensity,multidimensional dataset visualization,northern Pacific ocean,ocean conditions analysis,oceanography,perceptual texture patterns,pexels,preattentive processing,psychology,regularity,scientific visualization,simultaneously displayed attribute values,southeast Asia,storms,texture dimensions,typhoon activity tracking,visual interference,visual perception},
pages = {111--118,},
title = {{Building perceptual textures to visualize multidimensional datasets}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=745292},
volume = {98},
year = {1998}
}
@incollection{Lewandowsky1999,
address = {New York, NY, US},
author = {Lewandowsky, Stephan and Behrens, John T.},
booktitle = {The handbook of applied cognition},
chapter = {Statistica},
editor = {Durso, F. T. and Nickerson, Raymond S. and Schvaneveldt, R. W. and Dumais, S. T. and Lindsay, D. S. and Chi, M. T. H.},
pages = {514--549},
publisher = {Wiley},
title = {{No Title}},
year = {1999}
}
@article{Padilla2015,
abstract = {Understanding how people interpret and use visually presented uncertainty data is an important yet seldom studied aspect of data visualization applications. Current approaches in visualization often display uncertainty as an additional data attribute without a well-defined context. Our goal was to test whether different graphical displays (glyphs) would influence a decision about which of 2 weather forecasts was a more accurate predictor of an uncertain temperature forecast value. We used a statistical inference task based on fictional univariate normal distributions, each characterized by a mean and standard deviation. Participants viewed 1 of 5 different glyph types representing 2 weather forecast distributions. Three of these used variations in spatial encoding to communicate the distributions and the other 2 used nonspatial encoding (brightness or color). Four distribution pairs were created with different relative standard deviations (uncertainty of the forecasts). We found that there was a difference in how decisions were made with spatial versus nonspatial glyphs, but no difference among the spatial glyphs themselves. Furthermore, the effect of different glyph types changed as a function of the variability of the distributions. The results are discussed in the context of how visualizations might improve decision making under uncertainty.},
author = {Padilla, Lace M. and Hansen, Grace and Ruginski, Ian T. and Kramer, Heidi S. and Thompson, William B. and Creem-Regehr, Sarah H.},
doi = {10.1037/xap0000037},
file = {::},
isbn = {1076-898X},
issn = {1076898X},
journal = {Journal of Experimental Psychology: Applied},
keywords = {Decision making,Uncertainty,Visualization},
number = {1},
pages = {37--46},
pmid = {25437794},
title = {{The influence of different graphical displays on nonexpert decision making under uncertainty}},
volume = {21},
year = {2015}
}
@article{Price2014,
abstract = {Previous research has shown that people exhibit a sample size bias when judging the average of a set of stimuli on a single dimension. The more stimuli there are in the set, the greater people judge the average to be. This effect has been demonstrated reliably for judgments of the average likelihood that groups of people will experience negative, positive, and neutral events (Price, 2001; Price, Smith, {\&} Lench, 2006) and also for estimates of the mean of sets of numbers (Smith {\&} Price, 2010). The present research focuses on whether this effect is observed for judgments of average on a perceptual dimension. In 5 experiments we show that people's judgments of the average size of the squares in a set increase as the number of squares in the set increases. This effect occurs regardless of whether the squares in each set are presented simultaneously or sequentially; whether the squares in each set are different sizes or all the same size; and whether the response is a rating of size, an estimate of area, or a comparative judgment. These results are consistent with a priming account of the sample size bias, in which the sample size activates a representation of magnitude that directly biases the judgment of average. (PsycINFO Database Record (c) 2014 APA, all rights reserved).},
author = {Price, Paul C. and Kimura, Nicole M. and Smith, Andrew R. and Marshall, Lindsay D.},
doi = {10.1037/a0036576},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Price et al. - 2014 - Sample size bias in judgments of perceptual averages.pdf:pdf},
issn = {02787393},
journal = {Journal of Experimental Psychology: Learning Memory and Cognition},
keywords = {Judgments of average,Numerosity perception,Perceptual judgment,Size judgment},
number = {5},
pages = {1321--1331},
pmid = {24749965},
title = {{Sample size bias in judgments of perceptual averages}},
volume = {40},
year = {2014}
}
@article{Publishing2011,
author = {Publishing, Blackwell},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Publishing - 2011 - Graphical Perception The Visual Decoding of Quantitative Information on Graphical Displays of Data Author ( s ) Wil.pdf:pdf},
journal = {Society},
number = {3},
pages = {192--229},
title = {{Graphical Perception : The Visual Decoding of Quantitative Information on Graphical Displays of Data Author ( s ): William S . Cleveland and Robert McGill Source : Journal of the Royal Statistical Society . Series A ( General ), Vol . 150 , No . 3 ( 1987 }},
volume = {150},
year = {2011}
}
@article{Fagerlin2005,
abstract = {Background. People's treatment decisions are often influenced by anecdotal rather than statistical information. This can lead to patients making decisions based on others' experiences rather than on evidence-based medicine. Objective. To test whether the use of a quiz or pictograph decreases peo- ple's reliance on anecdotal information. Design. Two cross- sectional survey studies using hypothetical scenarios. Partic- ipants read a scenario describing angina and indicated a preference for either bypass surgery or balloon angioplasty. The cure rate of both treatmentswas presented usingprose, a pictograph, a quiz, or a pictograph and quiz combination. Participants read anecdotes from hypothetical patients who described the outcome of their treatment; the number of suc- cessful anecdotes was either representative or unrepresenta- tive of the cure rates. Setting and Participants. Prospective jurorsat the PhiladelphiaCountyCourthouseandtravelersat the Detroit-Wayne County Metropolitan Airport. Measure- ments. Proportion of respondents preferring bypass over bal- loon angioplasty. Results. In study 1, when statistical infor- mation was presented in prose, treatment choices were influenced by anecdotes, with 41{\%} of participants choosing bypass when the anecdotes were representative and only 20{\%} choosing it when the anecdotes were unrepresentative ( 2 = 14.40, P {\textless} 0.001).When statistics were reinforced with the pictograph and quiz, anecdotes had no significant influ- ence on treatment decisions (38{\%}choosing bypasswhen an- ecdotes were representative v. 44{\%} when unrepresentative, 2=1.08,P{\textgreater}0.20). Instudy 2, the tradeoffquizdidnot reduce the impact of the anecdotes (27{\%}v.28{\%}choosing bypass af- ter receiving or not receiving the quiz, 2 {\textless}1, P {\textgreater} 0.20).However, the pictographsignificantly reduced the impact of anec- dotes, with 27{\%} choosing bypass after receiving no pictograph and 40{\%} choosing bypass after receiving a pictograph( 2=6.44,P{\textless}0.001).Conclusions.Presentingsta- tistical information using a pictograph can reduce the undue influence of anecdotal reasoning on treatment choices.},
author = {Fagerlin, Angela and Wang, Catharine and Ubel, Peter A.},
doi = {10.1177/0272989X05278931},
isbn = {0272-989X},
issn = {0272989X},
journal = {Medical Decision Making},
keywords = {Anecdotes,Decision making,Informed consent,Numeracy,Pictographs,Risk},
number = {4},
pages = {398--405},
pmid = {16061891},
title = {{Reducing the influence of anecdotal reasoning on people's health care decisions: Is a picture worth a thousand statistics?}},
volume = {25},
year = {2005}
}
@article{Hadlak2015,
abstract = {Eurographics Conference on Visualization (EuroVis) - STARs},
author = {Hadlak, Steffen and Schumann, Heidrun and Schulz, Hans-j{\"{o}}rg},
doi = {10.2312/eurovisstar.20151109},
isbn = {10.2312/eurovisstar.20151109},
issn = {-},
journal = {Eurographics Conference on Visualization (EuroVis)},
number = {JANUARY},
pages = {1--20},
title = {{A Survey of Multi-faceted Graph Visualization}},
year = {2015}
}
@article{Gelman2007,
abstract = {Exploratory data analysis (EDA) and Bayesian inference (or, more generally, complex statistical modeling) --which are generally considered as unrelated statistical paradigms--can be particularly ef- fective in combination. In this paper, we present a Bayesian framework for EDA based on posterior predictive checks. We explain how posterior predictive simulations can be used to create reference dis- tributions for EDA graphs, and how this approach resolves some theoretical problems in Bayesian data analysis. We show how the generalization of Bayesian inference to include replicated data yr'e and repli- cated parameters Hrep follows a long tradition of generalizations in Bayesian theory. On the theoretical level, we present a predictive Bayesian formulation of goodness-of-fit testing, dis- tinguishing between p-values (posterior probabilities that specified antisymmetric discrepancy measures will exceed 0) and u-values (data summaries with uniform sampling distributions). We explain that p- values, unlike u-values, are Bayesian probability statements in that they condition on observed data. Having reviewed the general theoretical framework, we discuss the implications for statistical graphics and exploratory data analysis, with the goal being to unify exploratory data analysis with more formal statistical methods based on probability models. We interpret various graphical displays as posterior predictive checks and discuss how Bayesian inference can be used to determine reference distributions. The goal of this work is not to downgrade descriptive statistics, or to suggest they be replaced by Bayesian modeling, but rather to suggest how exploratory data analysis fits into the probability-modeling paradigm. We conclude with a discussion of the implications for practical Bayesian inference. In particular, we an- ticipate that Bayesian software can be generalized to draw simulations of replicated data and parameters from their posterior predictive distribution, and these can in turn be used to calibrate EDA graphs.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Gelman, Andrew},
doi = {10.1111/j.1751-5823.2003.tb00203.x},
eprint = {arXiv:1011.1669v3},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Gelman - 2007 - A Bayesian Formulation of Exploratory Data Analysis and Goodness-of-fit Testing.pdf:pdf},
isbn = {0306-7734},
issn = {03067734},
journal = {International Statistical Review},
keywords = {1 eda and bayesian,bootstrap,fisher,graphics,inference can cooperate,mixture model,model checking,multiple imputation,p -value,posterior predictive check,prior predictive check,s exact test,u -value},
number = {2},
pages = {369--382},
pmid = {15003161},
title = {{A Bayesian Formulation of Exploratory Data Analysis and Goodness-of-fit Testing*}},
url = {http://doi.wiley.com/10.1111/j.1751-5823.2003.tb00203.x},
volume = {71},
year = {2007}
}
@article{Ariely2001a,
abstract = {Sets of similar objects are common occurrences--a crowd of people, a bunch of bananas, a copse of trees, a shelf of books, a line of cars. Each item in the set may be distinct, highly visible, and discriminable. But when we look away from the set, what information do we have? The current article starts to address this question by introducing the idea of a set representation. This idea was tested using two new paradigms: mean discrimination and member identification. Three experiments using sets of different-sized spots showed that observers know a set's mean quite accurately but know little about the individual items, except their range. Taken together, these results suggest that the visual system represents the overall statistical, and not individual, properties of sets.},
author = {Ariely, Dan},
doi = {10.1111/1467-9280.00327},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Ariely - 2001 - Seeing Sets Representation by Statistical Properties.pdf:pdf},
isbn = {0956-7976},
issn = {09567976},
journal = {Psychological Science},
number = {2},
pages = {157--162},
pmid = {11340926},
title = {{Seeing Sets: Representation by Statistical Properties}},
volume = {12},
year = {2001}
}
@article{Vierheller2014,
abstract = {On spine: EDA. Includes index.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Vierheller, Janine},
doi = {10.1007/978-3-662-45006-2_9},
eprint = {arXiv:1011.1669v3},
isbn = {0201076160},
issn = {1557170X},
journal = {Addison Wesley, Reading},
keywords = {analysis,data},
number = {2012},
pages = {110--126},
pmid = {21097328},
title = {{Exploratory Data Analysis}},
url = {http://link.springer.com/10.1007/978-3-662-45006-2{\_}9},
year = {2014}
}
@article{Gelman2004,
abstract = {"Exploratory" and "confirmatory" data analysis can both be viewed as methods for comparing observed data to what Would be obtained tinder an implicit or explicit statistical model. For example, many of Tukey's methods can be interpreted as checks against hypothetical linear models and Poisson distributions. In more complex situations. Bayesian methods can be useful for constructing reference distributions for various plots that are useful in exploratory data analysis. This article proposes an approach to unify exploratory data analysis with more formal statistical methods based on probability models. These ideas are developed in the context of examples front fields including psychology. medicine. and social science.},
author = {Gelman, Andrew},
doi = {10.1198/106186004X11435},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Gelman - 2004 - Exploratory data analysis for complex models.pdf:pdf},
isbn = {1061-8600},
issn = {10618600},
journal = {Journal of Computational and Graphical Statistics},
keywords = {Bayesian inference,Bootstrap,Graphs,Multiple imputation,Posterior predictive checks},
number = {4},
pages = {755--779},
title = {{Exploratory data analysis for complex models}},
volume = {13},
year = {2004}
}
@article{Cleveland1985,
abstract = {Summary. Graphical perception is the visual decoding of the quantitative and qualitative information encoded on graphs. Recent investigations have uncovered basic principles of human graphical perception that have important implications for the display of data. The computer graphics revolution has stimulated the invention of many graphical methods for analyzing and presenting scientific data, such as box plots, two-tiered error bars, scatterplot smoothing, dot charts, and graphing on a log base 2 scale.},
author = {Cleveland, William S. and McGill, Robert},
doi = {10.1126/science.229.4716.828},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Cleveland, McGill - 1985 - Graphical perception and graphical methods for analyzing scientific data.pdf:pdf},
isbn = {00368075},
issn = {00368075},
journal = {Science},
number = {4716},
pages = {828--833},
pmid = {17777913},
title = {{Graphical perception and graphical methods for analyzing scientific data}},
volume = {229},
year = {1985}
}
@article{Chong2005,
abstract = {This paper explores some structural constraints on computing the mean sizes of sets of elements. Neither number nor density had much effect on judgments of mean size. Intermingled sets of circles segregated only by color gave mean discrimination thresholds for size that were as accurate as sets segregated by location. They were about the same when the relevant color was cued, when it was not cued, and when no distractor set was present. The results suggest that means are computed automatically and in parallel after an initial preattentive segregation by color. {\textcopyright} 2004 Elsevier Ltd. All rights reserved.},
author = {Chong, Sang Chul and Treisman, Anne},
doi = {10.1016/j.visres.2004.10.004},
isbn = {0042-6989 (Print)},
issn = {00426989},
journal = {Vision Research},
keywords = {Automatic processing,Mean,Perceptual groups,Size},
number = {7},
pages = {891--900},
pmid = {15644229},
title = {{Statistical processing: Computing the average size in perceptual groups}},
volume = {45},
year = {2005}
}
@incollection{Behrens2012,
abstract = {Exploratory data analysis (EDA), pioneered by John W. Tukey (1915–2000), introduces a variety of innovative techniques and combines them with five important principles of data analysis: display, re-expression, residuals, resistance, and iteration. Many of the techniques that Tukey pioneered have become familiar: stem-and-leaf display, five-number summary, boxplot, and a rule for flagging potential outliers in batches of data. Computing methods have extended EDA to larger data sets and higher dimensions, and diagnostic statistics have extended the EDA approach to include more traditional statistical methods. Although its innovative methods have received much attention, the principal contribution of EDA is philosophical. EDA advocates exploring data for patterns and relationships without requiring prior hypotheses. The principle of resistance calls for identifying extraordinary cases and then setting them aside or downweighting them. Re-expression uses mathematical transformations to simplify patterns in data. EDA suggests that analyses are more scientifically useful and productive when data have been transformed to agree better with basic assumptions. Residuals come from summarizing the patterns found so far and subtracting that summary from the data, to reveal departures and additional patterns. EDA often works with residuals to refine or extend models fitted to data. Frequent use of graphical displays maintains contact with data, residuals, and summaries, and it often reveals unexpected behavior. EDA approaches do not terminate with a hypothesis test. Effective data analysis is iterative, finding and summarizing patterns and then probing more deeply. These approaches stand in contrast to the formalistic scientific method paradigm of first stating a hypothesis based on prior theory, then collecting data, and finally applying a statistical test of the hypothesis. Proponents of the EDA philosophy maintain that the EDA approach is more likely to discover new and interesting patterns and relationships, in much the same way that science has traditionally made progress. Exploratory analyses can incorporate methods of statistical inference, but use them more as indicators of the strength of a relationship or the fit of a model than as confirmation of a hypothesis. In this chapter, we elucidate the EDA approach, illustrating it with examples. We hope to convince the reader that this approach should be a standard part of anyone's analysis of data. For many experienced data analysts, an EDA approach forms the main ingredient of their analyses, with only the occasional “seasoning” of formal hypothesis testing.},
author = {Behrens, John T. and DiCerbo, Kristen E. and Yel, Nedim and Levy, Roy and Velleman, Paul F. and Hoaglin, David C.},
booktitle = {APA handbook of research methods in psychology, Vol 3: Data analysis and research publication},
chapter = {Explorator},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Vierheller - 2014 - Exploratory Data Analysis.pdf:pdf},
keywords = {EDA vs. CDA: misunderstandings,EDA: philosophy,EDA: popularity,EDA: tenets,assumptions: violation: common,boxplots: limitations,graphics: ability to lie,graphics: condensing information,graphics: multiple views,heuristic: graphics match analysis,heuristic: ordering of rows/axes,long-run probability,rough CDA,theory-data link},
mendeley-tags = {EDA vs. CDA: misunderstandings,EDA: philosophy,EDA: popularity,EDA: tenets,assumptions: violation: common,boxplots: limitations,graphics: ability to lie,graphics: condensing information,graphics: multiple views,heuristic: graphics match analysis,heuristic: ordering of rows/axes,long-run probability,rough CDA,theory-data link},
pages = {34--70},
title = {{Exploratory Data Analysis}},
year = {2012}
}
@article{Tal2014,
abstract = {The appearance of being scientific can increase persuasiveness. Even trivial cues can create such an appearance of a scientific basis. In our studies, including simple elements, such as graphs (Studies 1–2) or a chemical formula (Study 3), increased belief in a medication's efficacy. This appears to be due to the association of such elements with science, rather than increased comprehensibility, use of visuals, or recall. Belief in science moderates the persuasive effect of graphs, such that people who have a greater belief in science are more affected by the presence of graphs (Study 2). Overall, the studies contribute to past research by demonstrating that even trivial elements can increase public persuasion despite their not truly indicating scientific expertise or objective support. Keywords},
author = {Tal, Aner and Wansink, Brian},
doi = {10.1177/0963662514549688},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Tal, Wansink - 2014 - Blinded with science Trivial graphs and formulas increase ad persuasiveness and belief in product efficacy.pdf:pdf},
isbn = {0963-6625 (Print)$\backslash$r0963-6625 (Linking)},
issn = {13616609},
journal = {Public Understanding of Science},
keywords = {Media and science,Medication,Persuasion,Public understanding of science,Rhetoric of science and technology,Science and pop culture,Science communications,Scientific ethos,Scientific literacy},
number = {1},
pages = {117--125},
pmid = {25319823},
title = {{Blinded with science: Trivial graphs and formulas increase ad persuasiveness and belief in product efficacy}},
volume = {25},
year = {2014}
}
@article{Zacks1999,
abstract = {Interpretations of graphs seem to be rooted in principles of cognitive naturalness and information processing rather than arbitrary correspondences. These predict that people should more readily associate bars with discrete comparisons between data points because bars are discrete entities and facilitate point estimates. They should more readily associate lines with trends because lines connect discrete entities and directly represent slope. The predictions were supported in three experiments--two examining comprehension and one production. The correspondence does not seem to depend on explicit knowledge of rules. Instead, it may reflect the influence of the communicative situation as well as the perceptual properties of graphs.},
author = {Zacks, Jeff and Tversky, Barbara},
doi = {10.3758/BF03201236},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Zacks, Tversky - 1999 - Bars and lines A study of graphic communication.pdf:pdf},
isbn = {0090-502X (Print)$\backslash$r0090-502X (Linking)},
issn = {0090502X},
journal = {Memory and Cognition},
number = {6},
pages = {1073--1079},
pmid = {10586582},
title = {{Bars and lines: A study of graphic communication}},
volume = {27},
year = {1999}
}
@article{Lewandowsky1989,
abstract = {When multiple groups are shown in a scatterplot each stratum is represented by a different symbol; for example, three strata might be coded using red, green, and yellow circles. Various symbol types were compared by behavioral experiment: Subjects were fastest when strata were coded using different colors and slowest when strata were coded with confusable letters—but there were no differences in accuracy. Accuracy differed only when processing time was restricted, again with different colors and confusable letters representing the two extremes. We conclude that color is the optimal symbol type and show that measuring response latency in addition to accuracy is essential in research on graphical perception. {\textcopyright} 1989 Taylor  {\&}  Francis Group, LLC.},
author = {Lewandowsky, Stephan and Spence, Ian},
doi = {10.1080/01621459.1989.10478821},
issn = {1537274X},
journal = {Journal of the American Statistical Association},
keywords = {Perception,Speed-accuracy-trade-off function,Statistical graphs},
number = {407},
pages = {682--688},
title = {{Discriminating strata in scatterplots}},
volume = {84},
year = {1989}
}
@article{Tukey1972,
author = {Tukey, John W.},
doi = {10.1090/qam/99740},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Tukey - 1972 - Data analysis, computation and mathematics.pdf:pdf},
issn = {0033-569X},
journal = {Quarterly of Applied Mathematics},
month = {apr},
number = {1},
pages = {51--65},
title = {{Data analysis, computation and mathematics}},
url = {http://www.ams.org/qam/1972-30-01/S0033-569X-1972-99740-X/},
volume = {30},
year = {1972}
}
@article{Behrens1997,
abstract = {Exploratory data analysis (EDA) is a well-established statistical tradition that provides conceptual and computational tools for discovering patterns to foster hypothesis development and refinement. These tools and attitudes complement the use of significance and hypothesis tests used in confirmatory data analysis (CDA). Although EDA complements rather than replaces CDA, use of CDA without EDA is seldom warranted. Even when well specified theories are held, EDA helps one interpret the results of CDA and may reveal unexpected or misleading patterns in the data. This article introduces the central heuristics and computational tools of EDA and contrasts it with CDA and exploratory statistics in general. EDA techniques are illustrated using previously published psychological data. Changes in statistical training and practice are recommended to incorporate these tools.},
annote = {"Listening" is not an EDA characteristic. It is a data analysis characteristic. The distinction between EDA and CDA is more about ethics than tools or attitudes. Or maybe it is? What is your purpose for data analysis? Are you trying to find out what your data are telling you? - EDA. Are you trying to confirm something you already suspect? - CDA. Are you trying to find something that is reportable? - p-hacking. 

This is why a discussion of ethics is key, I think.},
archivePrefix = {arXiv},
arxivId = {arXiv:gr-qc/9809069v1},
author = {Behrens, John T},
doi = {10.1037/1082-989X.2.2.131},
eprint = {9809069v1},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Behrens - 1997 - Principles and Procedures of Exploratory Data Analysis.pdf:pdf},
isbn = {1082-989X$\backslash$r1939-1463},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {EDA and p-values,EDA vs. CDA: misunderstandings,EDA: Rules for use,EDA: philosophy,EDA: popularity,EDA: publishability of results,EDA: tenets,NHST as an exploratory tool,Type III error,exploratory statistics,graphics: heuristics,graphics: multiple views,graphics: requirement for analysis,illusion of certainty,rough CDA,rough EDA,theory-data link},
mendeley-tags = {EDA and p-values,EDA vs. CDA: misunderstandings,EDA: Rules for use,EDA: philosophy,EDA: popularity,EDA: publishability of results,EDA: tenets,NHST as an exploratory tool,Type III error,exploratory statistics,graphics: heuristics,graphics: multiple views,graphics: requirement for analysis,illusion of certainty,rough CDA,rough EDA,theory-data link},
number = {2},
pages = {131--160},
pmid = {24809974},
primaryClass = {arXiv:gr-qc},
title = {{Principles and Procedures of Exploratory Data Analysis}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.362.8937{\&}rep=rep1{\&}type=pdf},
volume = {2},
year = {1997}
}
@article{Killeen2018,
author = {Killeen, Peter R.},
doi = {10.1007/s40614-018-0171-8},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Killeen - 2018 - Predict, control, and replicate to understand How statistics can foster the fundamental goals of science.pdf:pdf},
issn = {2520-8969},
journal = {Perspectives on Behavior Science},
keywords = {EDA; individual-level analysis,NHST: cause of replication crisis,control,individual-level analysis,nhst,open science collaboration,p{\_}rep,predict,replicate,understand},
mendeley-tags = {EDA; individual-level analysis,NHST: cause of replication crisis,individual-level analysis,p{\_}rep},
month = {sep},
pages = {1--24},
publisher = {Springer International Publishing},
title = {{Predict, control, and replicate to understand: How statistics can foster the fundamental goals of science}},
url = {http://link.springer.com/10.1007/s40614-018-0171-8},
year = {2018}
}
@article{Amrhein2017,
abstract = {{\textless}p{\textgreater} The widespread use of ‘statistical significance' as a license for making a claim of a scientific finding leads to considerable distortion of the scientific process (according to the American Statistical Association). We review why degrading {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values into ‘significant' and ‘nonsignificant' contributes to making studies irreproducible, or to making them seem irreproducible. A major problem is that we tend to take small {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values at face value, but mistrust results with larger {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values. In either case, {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values tell little about reliability of research, because they are hardly replicable even if an alternative hypothesis is true. Also significance ( {\textless}italic{\textgreater}p{\textless}/italic{\textgreater}  ≤ 0.05) is hardly replicable: at a good statistical power of 80{\%}, two studies will be ‘conflicting', meaning that one is significant and the other is not, in one third of the cases if there is a true effect. A replication can therefore not be interpreted as having failed only because it is nonsignificant. Many apparent replication failures may thus reflect faulty judgment based on significance thresholds rather than a crisis of unreplicable research. Reliable conclusions on replicability and practical importance of a finding can only be drawn using cumulative evidence from multiple independent studies. However, applying significance thresholds makes cumulative knowledge unreliable. One reason is that with anything but ideal statistical power, significant effect sizes will be biased upwards. Interpreting inflated significant results while ignoring nonsignificant results will thus lead to wrong conclusions. But current incentives to hunt for significance lead to selective reporting and to publication bias against nonsignificant findings. Data dredging, {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -hacking, and publication bias should be addressed by removing fixed significance thresholds. Consistent with the recommendations of the late Ronald Fisher, {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values should be interpreted as graded measures of the strength of evidence against the null hypothesis. Also larger {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values offer some evidence against the null hypothesis, and they cannot be interpreted as supporting the null hypothesis, falsely concluding that ‘there is no effect'. Information on possible true effect sizes that are compatible with the data must be obtained from the point estimate, e.g., from a sample average, and from the interval estimate, such as a confidence interval. We review how confusion about interpretation of larger {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values can be traced back to historical disputes among the founders of modern statistics. We further discuss potential arguments against removing significance thresholds, for example that decision rules should rather be more stringent, that sample sizes could decrease, or that {\textless}italic{\textgreater}p{\textless}/italic{\textgreater} -values should better be completely abandoned. We conclude that whatever method of statistical inference we use, dichotomous threshold thinking must give way to non-automated informed judgment. {\textless}/p{\textgreater}},
author = {Amrhein, Valentin and Korner-Nievergelt, Fr{\"{a}}nzi and Roth, Tobias},
doi = {10.7717/peerj.3544},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Amrhein, Korner-Nievergelt, Roth - 2017 - The earth is flat ( ipi {\&}gt 0.05) significance thresholds and the crisis of unreplicable resea.pdf:pdf},
issn = {2167-8359},
journal = {PeerJ},
keywords = {Big N vs Many Small Studies,EDA and p-values,EDA vs. CDA: misunderstandings,EDA: publishability of results,Graded evidence,Nonsignificant,P-value,Publication bias,Replicability,Reproducibility,Significant,Threshold,Truth inflation,Winner's curse,evidence,judgment,p-hacking, prevalence of,p-hacking: cause of replication crisis,p-values, discourage replications,p-values, soft threshold,p-values, variability of,priors, defenses of},
mendeley-tags = {Big N vs Many Small Studies,EDA and p-values,EDA vs. CDA: misunderstandings,EDA: publishability of results,evidence,judgment,p-hacking, prevalence of,p-hacking: cause of replication crisis,p-values, discourage replications,p-values, soft threshold,p-values, variability of,priors, defenses of},
month = {jul},
pages = {e3544},
publisher = {PeerJ Inc.},
title = {{The earth is flat ( {\textless}i{\textgreater}p{\textless}/i{\textgreater}  {\textgreater} 0.05): significance thresholds and the crisis of unreplicable research}},
url = {https://peerj.com/articles/3544},
volume = {5},
year = {2017}
}
@article{Branch2018,
author = {Branch, Marc N.},
doi = {10.1007/s40614-018-0158-5},
issn = {2520-8969},
journal = {Perspectives on Behavior Science},
keywords = {NHST: cause of replication crisis},
mendeley-tags = {NHST: cause of replication crisis},
month = {jun},
pages = {1--13},
publisher = {Springer International Publishing},
title = {{The “Reproducibility Crisis:” Might the Methods Used Frequently in Behavior-Analysis Research Help?}},
url = {http://link.springer.com/10.1007/s40614-018-0158-5},
year = {2018}
}
@article{Gigerenzer2018,
abstract = {The “replication crisis” has been attributed to misguided external incentives gamed by researchers (the strategic-game hypothesis). Here, I want to draw attention to a complementary internal factor, namely, researchers' widespread faith in a statistical ritual and associated delusions (the statistical-ritual hypothesis). The “null ritual,” unknown in statistics proper, eliminates judgment precisely at points where statistical theories demand it. The crucial delusion is that the p value specifies the probability of a successful replication (i.e., 1 – p), which makes replication studies appear to be superfluous. A review of studies with 839 academic psychologists and 991 students shows that the replication delusion existed among 20{\%} of the faculty teaching statistics in psychology, 39{\%} of the professors and lecturers, and 66{\%} of the students. Two further beliefs, the illusion of certainty (e.g., that statistical significance proves that an effect exists) and Bayesian wishful thinking (e.g., that the probabi...},
annote = {This is mostly recycling many of his old arguments, but I do like the evidence he provides that the replication crisis isn't just about p-hacking; it's also about NHST.},
author = {Gigerenzer, Gerd},
doi = {10.1177/2515245918771329},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Gigerenzer - 2018 - Statistical Rituals The Replication Delusion and How We Got There.pdf:pdf},
issn = {2515-2459},
journal = {Advances in Methods and Practices in Psychological Science},
keywords = {EDA and p-values,EDA: Rules for use,Fisher and NHST,NHST: cause of replication crisis,Neyman-Pearson and NHST,criticisms,editors: pressure to NHST,illusion of certainty,individual-level analysis,judgment,not enough,null ritual,p value,p-hacking,p-hacking: cause of replication crisis,p-values and replication,pre-registration,replication,replication crisis: causes,science: NHST not required for good},
mendeley-tags = {EDA and p-values,EDA: Rules for use,Fisher and NHST,NHST: cause of replication crisis,Neyman-Pearson and NHST,criticisms,editors: pressure to NHST,individual-level analysis,judgment,not enough,p-hacking: cause of replication crisis,p-values and replication,pre-registration,replication,replication crisis: causes,science: NHST not required for good},
month = {jun},
number = {2},
pages = {198--218},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Statistical Rituals: The Replication Delusion and How We Got There}},
url = {http://journals.sagepub.com/doi/10.1177/2515245918771329},
volume = {1},
year = {2018}
}
@article{DeBoeck2018,
author = {{De Boeck}, Paul and Jeon, Minjeong},
doi = {10.1037/bul0000154},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/De Boeck, Jeon - 2018 - Perceived crisis and reforms Issues, explanations, and remedies.pdf:pdf},
issn = {1939-1455},
journal = {Psychological Bulletin},
month = {jul},
number = {7},
pages = {757--777},
title = {{Perceived crisis and reforms: Issues, explanations, and remedies.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/bul0000154},
volume = {144},
year = {2018}
}
@article{Vaidyanathan2015,
abstract = {Although the past few decades have seen much work in psychopathology research that has yielded provocative insights, relatively little progress has been made in understanding the etiology of mental disorders. We contend that this is due to an overreliance on statistics and technology with insufficient attention to adequacy of experimental design, a lack of integration of data across various domains of research, and testing of theoretical models using relatively weak study designs. We provide a conceptual discussion of these issues and follow with a concrete demonstration of our proposed solution. Using two different disorders—depression and substance use—as examples, we illustrate how we can evaluate competing theories regarding their etiology by integrating information from various domains including latent variable models, neurobiology, and quasi-experimental data such as twin and adoption studies, rather than relying on any single methodology alone. More broadly, we discuss the extent to which such inte...},
author = {Vaidyanathan, Uma and Vrieze, Scott I. and Iacono, William G.},
doi = {10.1080/1047840X.2015.1015367},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Vaidyanathan, Vrieze, Iacono - 2015 - The Power of Theory, Research Design, and Transdisciplinary Integration in Moving Psychopathology.pdf:pdf},
issn = {1047-840X},
journal = {Psychological Inquiry},
keywords = {etiology of mental disorders,neurobiology,pitfalls and advantages of technology and statisti,quasi-experimental designs,theory versus data-driven approaches},
month = {jul},
number = {3},
pages = {209--230},
publisher = {Routledge},
title = {{The Power of Theory, Research Design, and Transdisciplinary Integration in Moving Psychopathology Forward}},
url = {http://www.tandfonline.com/doi/full/10.1080/1047840X.2015.1015367},
volume = {26},
year = {2015}
}
@article{Friedrich2018,
abstract = {This study describes a close replication of Friedrich, Buday, and Kerr's late 1990s survey of statistics instruction in undergraduate psychology programs. Disciplinary reform efforts at that time such as the report of the APA Task Force on Statistical Inference, together with recent progress in the new statistics movement, raise important questions about whether undergraduate instruction has kept pace. Other than increases in effect size coverage, instructors' estimates of class time devoted to critical reform topics have changed relatively little over nearly two decades, with significant attention often reserved for a rarely offered second-level, advanced class. We consider the importance of addressing the statistics curriculum in ways that meet the reading access, critical thinking, and research skill needs of current majors.},
author = {Friedrich, James and Childress, Julia and Cheng, David},
doi = {10.1177/0098628318796414},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Friedrich, Childress, Cheng - 2018 - Replicating a National Survey on Statistical Training in Undergraduate Psychology Programs.pdf:pdf},
issn = {0098-6283},
journal = {Teaching of Psychology},
keywords = {confidence intervals,effect sizes,meta-analysis,new statistics,statistics},
month = {oct},
number = {4},
pages = {312--323},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Replicating a National Survey on Statistical Training in Undergraduate Psychology Programs}},
url = {http://journals.sagepub.com/doi/10.1177/0098628318796414},
volume = {45},
year = {2018}
}
@article{Jebb2017,
abstract = {Across academic disciplines, scientific progress is maximized when there is a balance between deductive and inductive approaches. To promote this balance in organizational science, rigorous inductive research aimed at phenomenon detection must be further encouraged. To this end, the present article discusses the logic and methods of exploratory data analysis (EDA), the mode of analysis concerned with discovery, exploration, and empirically detecting phenomena in data. We begin by first describing the historical and conceptual background of EDA. We then discuss two issues related to EDA and its relationship to scientific credibility. First, we argue that EDA fosters a replication-based science by requiring cross-validation and by emphasizing the natural uncertainty of data patterns. Second, we clarify that EDA is distinguishable from other exploratory practices that are considered scientifically questionable (e.g., “p-hacking”, “data fishing” and “data-dredging”). In the following section of the paper, we present a final argument for EDA: that it helps maximize the value of data. To illustrate this point, we present several graphical methods for detecting data patterns and provide references to further techniques for the interested reader.},
author = {Jebb, Andrew T. and Parrigon, Scott and Woo, Sang Eun},
doi = {10.1016/J.HRMR.2016.08.003},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Jebb, Parrigon, Woo - 2017 - Exploratory data analysis as a foundation of inductive research.pdf:pdf},
issn = {1053-4822},
journal = {Human Resource Management Review},
month = {jun},
number = {2},
pages = {265--276},
publisher = {JAI},
title = {{Exploratory data analysis as a foundation of inductive research}},
url = {https://www.sciencedirect.com/science/article/pii/S1053482216300353},
volume = {27},
year = {2017}
}
@article{Wicherts2016,
abstract = {The designing, collecting, analyzing, and reporting of psychological studies entail many choices that are often arbitrary. The opportunistic use of these so-called researcher degrees of freedom aimed at obtaining statistically significant results is problematic because it enhances the chances of false positive results and may inflate effect size estimates. In this review article, we present an extensive list of 34 degrees of freedom that researchers have in formulating hypotheses, and in designing, running, analyzing, and reporting of psychological research. The list can be used in research methods education, and as a checklist to assess the quality of preregistrations and to determine the potential for bias due to (arbitrary) choices in unregistered studies.},
annote = {This article is a checklist for strictly confirmatory data analysis. It's quite exhaustive, and makes the point that you really need to be specific.},
author = {Wicherts, Jelte M. and Veldkamp, Coosje L.S. and Augusteijn, Hilde E.M. and Bakker, Marjan and van Aert, Robbie C.M. and van Assen, Marcel A.L.M.},
doi = {10.3389/fpsyg.2016.01832},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Wicherts et al. - 2016 - Degrees of freedom in planning, running, analyzing, and reporting psychological studies A checklist to avoid P-.pdf:pdf},
isbn = {1664-1078},
issn = {16641078},
journal = {Frontiers in Psychology},
keywords = {Ethics,criticisms,criticisms of,decision criteria,ethics,hypotheses:specific,not enough,open access,openness and transparency,p-hacking,p-hacking: cause of replication crisis,pre-registration,pre-registration and EDA,reform,replication crisis: causes,reporting practices},
mendeley-tags = {Ethics,criticisms,criticisms of,decision criteria,ethics,hypotheses:specific,not enough,open access,openness and transparency,p-hacking,p-hacking: cause of replication crisis,pre-registration,pre-registration and EDA,reform,replication crisis: causes,reporting practices},
number = {NOV},
pmid = {27933012},
title = {{Degrees of freedom in planning, running, analyzing, and reporting psychological studies: A checklist to avoid P-hacking}},
volume = {7},
year = {2016}
}
@article{Patil2016,
abstract = {A recent study of the replicability of key psychological findings is a major contribution toward understanding the human side of the scientific process. Despite the careful and nuanced analysis reported, the simple narrative disseminated by the mass, social, and scientific media was that in only 36{\%} of the studies were the original results replicated. In the current study, however, we showed that 77{\%} of the replication effect sizes reported were within a 95{\%} prediction interval calculated using the original effect size. Our analysis suggests two critical issues in understanding replication of psychological studies. First, researchers' intuitive expectations for what a replication should show do not always match with statistical estimates of replication. Second, when the results of original studies are very imprecise, they create wide prediction intervals—and a broad range of replication effects that are consistent with the original estimates. This may lead to effects that replicate successfully, in that replication results are consistent with statistical expectations, but do not provide much information about the size (or existence) of the true effect. In this light, the results of the Reproducibility Project: Psychology can be viewed as statistically consistent with what one might expect when performing a large-scale replication experiment.},
archivePrefix = {arXiv},
arxivId = {15334406},
author = {Patil, Prasad and Peng, Roger D. and Leek, Jeffrey T.},
doi = {10.1177/1745691616646366},
eprint = {15334406},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Patil, Peng, Leek - 2016 - What Should Researchers Expect When They Replicate Studies A Statistical View of Replicability in Psychologic.pdf:pdf},
isbn = {1745-6924 (Electronic) 1745-6916 (Linking)},
issn = {17456924},
journal = {Perspectives on Psychological Science},
number = {4},
pmid = {27474140},
title = {{What Should Researchers Expect When They Replicate Studies? A Statistical View of Replicability in Psychological Science}},
volume = {11},
year = {2016}
}
@misc{Munafo2017,
abstract = {Improving the reliability and efficiency of scientific research will increase the credibility of the published scientific literature and accelerate discovery. Here we argue for the adoption of measures to optimize key elements of the scientific process: methods, reporting and dissemination, reproducibility, evaluation and incentives. There is some evidence from both simulations and empirical studies supporting the likely effectiveness of these measures, but their broad adoption by researchers, institutions, funders and journals will require iterative evaluation and improvement. We discuss the goals of these measures, and how they can be implemented, in the hope that this will facilitate action toward improving the transparency, reproducibility and efficiency of scientific research.},
author = {Munaf{\`{o}}, Marcus R. and Nosek, Brian A. and Bishop, Dorothy V.M. and Button, Katherine S. and Chambers, Christopher D. and {Percie Du Sert}, Nathalie and Simonsohn, Uri and Wagenmakers, Eric-Jan and Ware, Jennifer J. and Ioannidis, John P.A.},
booktitle = {Nature Human Behaviour},
doi = {10.1038/s41562-016-0021},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Munaf{\`{o}} et al. - 2017 - A manifesto for reproducible science.pdf:pdf},
isbn = {4156201600},
issn = {23973374},
keywords = {EDA: transparency,Ethics,change: grassroots,change: top-down,ethics,hiding behind status quo,illusion of certainty,inviting criticism,not enough,post publication review,pre-registration,pre-registration and EDA,reform,replication,reporting practices,resources,seek truth,skepticism,transparency,uncertainty},
mendeley-tags = {EDA: transparency,Ethics,change: grassroots,change: top-down,ethics,hiding behind status quo,illusion of certainty,inviting criticism,not enough,post publication review,pre-registration,pre-registration and EDA,reform,replication,reporting practices,resources,seek truth,skepticism,transparency,uncertainty},
number = {1},
pmid = {25516371},
title = {{A manifesto for reproducible science}},
volume = {1},
year = {2017}
}
@article{Rodgers2002,
author = {Rodgers, Joseph Lee and Rowe, David C.},
doi = {10.1037/0033-295X.109.3.599},
issn = {1939-1471},
journal = {Psychological Review},
number = {3},
pages = {599--603},
title = {{Theory development should begin (but not end) with good empirical fits: A comment on Roberts and Pashler (2000).}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0033-295X.109.3.599},
volume = {109},
year = {2002}
}
@article{Roberts2000,
abstract = {Quantitative theories with free parameters often gain credence when they closely fit data. This is a mistake. A good fit reveals nothing about the flexibility of the theory (how much it cannot fit), the variability of the data (how firmly the data rule out what the theory cannot fit), or the likelihood of other outcomes (perhaps the theory could have fit any plausible result), and a reader needs all 3 pieces of information to decide how much the fit should increase belief in the theory. The use of good fits as evidence is not supported by philosophers of science nor by the history of psychology; there seem to be no examples of a theory supported mainly by good fits that has led to demonstrable progress. A better way to test a theory with free parameters is to determine how the theory constrains possible outcomes (i.e., what it predicts), assess how firmly actual outcomes agree with those constraints, and determine if plausible alternative outcomes would have been inconsistent with the theory, allowing for the variability of the data.},
author = {Roberts, S and Pashler, H},
issn = {0033-295X},
journal = {Psychological review},
month = {apr},
number = {2},
pages = {358--67},
pmid = {10789200},
title = {{How persuasive is a good fit? A comment on theory testing.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/10789200},
volume = {107},
year = {2000}
}
@article{Simmons2011,
abstract = {In this article, we accomplish two things. First, we show that despite empirical psychologists' nominal endorsement of a low rate of false-positive findings (≤ .05), flexibility in data collection, analysis, and reporting dramatically increases actual false-positive rates. In many cases, a researcher is more likely to falsely find evidence that an effect exists than to correctly find evidence that it does not. We present computer simulations and a pair of actual experiments that demonstrate how unacceptably easy it is to accumulate (and report) statistically significant evidence for a false hypothesis. Second, we suggest a simple, low-cost, and straightforwardly effective disclosure-based solution to this problem. The solution involves six concrete requirements for authors and four guidelines for reviewers, all of which impose a minimal burden on the publication process.},
author = {Simmons, Joseph P. and Nelson, Leif D. and Simonsohn, Uri},
doi = {10.1177/0956797611417632},
issn = {0956-7976},
journal = {Psychological Science},
keywords = {disclosure,methodology,motivated reasoning,publication},
month = {nov},
number = {11},
pages = {1359--1366},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{False-Positive Psychology}},
url = {http://journals.sagepub.com/doi/10.1177/0956797611417632},
volume = {22},
year = {2011}
}
@article{Perezgonzalez2015,
abstract = {Despite frequent calls for the overhaul of null hypothesis significance testing (NHST), this controversial procedure remains ubiquitous in behavioral, social and biomedical teaching and research. Little change seems possible once the procedure becomes well ingrained in the minds and current practice of researchers; thus, the optimal opportunity for such change is at the time the procedure is taught, be this at undergraduate or at postgraduate levels. This paper presents a tutorial for the teaching of data testing procedures, often referred to as hypothesis testing theories. The first procedure introduced is Fisher's approach to data testing—tests of significance; the second is Neyman-Pearson's approach—tests of acceptance; the final procedure is the incongruent combination of the previous two theories into the current approach—NSHT. For those researchers sticking with the latter, two compromise solutions on how to improve NHST conclude the tutorial.},
author = {Perezgonzalez, Jose D.},
doi = {10.3389/fpsyg.2015.00223},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Perezgonzalez - 2015 - Fisher, Neyman-Pearson or NHST A tutorial for teaching data testing.pdf:pdf},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Fisher,NHST,Neyman-Pearson,Null hypothesis significance testing,Teaching statistics,Test of significance,Test of statistical hypothesis,statistical education},
month = {mar},
pages = {223},
publisher = {Frontiers},
title = {{Fisher, Neyman-Pearson or NHST? A tutorial for teaching data testing}},
url = {http://journal.frontiersin.org/Article/10.3389/fpsyg.2015.00223/abstract},
volume = {6},
year = {2015}
}
@article{Valentine2015,
abstract = {In this article we provide concrete guidance to researchers on ways that they can explore and communicate the results of their studies. Although we believe the methods we outline are important for any study, they are particularly useful for researchers who wish to avoid the null hypothesis significance testing paradigm. We articulate three basic principles of data presentation: (a) use graphic displays to facilitate understanding of descriptive statistics, (b) provide measures of variability with measures of central tendency for continuous outcomes, and (c) compute and thoughtfully interpret effect sizes and effect size translations. We then put these principles into action using data drawn from two real social psychological experiments and provide tools (including software code and a new effect size translation) that will help researchers to quickly and efficiently adopt the recommendations that they find sensible.},
author = {Valentine, Jeffrey C. and Aloe, Ariel M. and Lau, Timothy S.},
doi = {10.1080/01973533.2015.1060240},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Valentine, Aloe, Lau - 2015 - Life After NHST How to Describe Your Data Without p-ing Everywhere.html:html},
issn = {0197-3533},
journal = {Basic and Applied Social Psychology},
month = {sep},
number = {5},
pages = {260--273},
publisher = {Psychology Press},
title = {{Life After NHST: How to Describe Your Data Without p-ing Everywhere}},
url = {http://www.tandfonline.com/doi/full/10.1080/01973533.2015.1060240},
volume = {37},
year = {2015}
}
@book{Shaughnessy2009,
abstract = {8th ed. Part I. General Issues -- 1. Introduction -- 2. The Scientific Method -- 3. Ethical Issues in the Conduct of Psychological Research -- Part Ii. Descriptive Methods -- 4. Observation -- 5. Survey Research -- 6. Unobtrusive Measures of Behavior -- Part Iii. Experimental Methods -- 7. Independent Groups Designs -- 8. Repeated Measures Designs -- 9. Complex Designs -- Part Iv. Applied Research -- 10. Single-Case Designs, and Small-n Research -- 11. Quasi-Experimental Designs and Program Evaluation -- Part V. Analyzing and Reporting Research -- 12. Data Analysis and Interpretation: Part I. Describing Data, Confidence Intervals, Correlation -- 13. Data Analysis and Interpretation: Part II. Tests of Statistical Significance and the Analysis Story -- 14. Communication in Psychology.},
address = {New York, NY},
author = {Shaughnessy, John J. and Zechmeister, Eugene B. and Zechmeister, Jeanne S.},
isbn = {0073382698},
pages = {555},
publisher = {McGraw-Hill Higher Education},
title = {{Research methods in psychology}},
year = {2009}
}
@article{Hubbard1995,
author = {Hubbard, Raymond},
doi = {10.1037/0003-066X.50.12.1098.a},
issn = {0003-066X},
journal = {American Psychologist},
number = {12},
pages = {1098--1098},
title = {{The earth is highly significantly round (p {\textless} .0001).}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0003-066X.50.12.1098.a},
volume = {50},
year = {1995}
}
@article{Trafimow2018,
abstract = {We argue that making accept/reject decisions on scientific hypotheses, including a recent call for changing the canonical alpha level from p = 0.05 to p = 0.005, is deleterious for the finding of new discoveries and the progress of science. Given that blanket and variable alpha levels both are problematic, it is sensible to dispense with significance testing altogether. There are alternatives that address study design and sample size much more directly than significance testing does; but none of the statistical tools should be taken as the new magic method giving clear-cut mechanical answers. Inference should not be based on single studies at all, but on cumulative evidence from multiple independent studies. When evaluating the strength of the evidence, we should consider, for example, auxiliary assumptions, the strength of the experimental design, and implications for applications. To boil all this down to a binary decision based on a p-value threshold of 0.05, 0.01, 0.005, or anything else, is not acceptable.},
author = {Trafimow, David and Amrhein, Valentin and Areshenkoff, Corson N and Barrera-Causil, Carlos J and Beh, Eric J and Bilgi{\c{c}}, Yusuf K and Bono, Roser and Bradley, Michael T and Briggs, William M and Cepeda-Freyre, H{\'{e}}ctor A and Chaigneau, Sergio E and Ciocca, Daniel R and Correa, Juan C and Cousineau, Denis and de Boer, Michiel R and Dhar, Subhra S and Dolgov, Igor and G{\'{o}}mez-Benito, Juana and Grendar, Marian and Grice, James W and Guerrero-Gimenez, Martin E and Guti{\'{e}}rrez, Andr{\'{e}}s and Huedo-Medina, Tania B and Jaffe, Klaus and Janyan, Armina and Karimnezhad, Ali and Korner-Nievergelt, Fr{\"{a}}nzi and Kosugi, Koji and Lachmair, Martin and Ledesma, Rub{\'{e}}n D and Limongi, Roberto and Liuzza, Marco T and Lombardo, Rosaria and Marks, Michael J and Meinlschmidt, Gunther and Nalborczyk, Ladislas and Nguyen, Hung T and Ospina, Raydonal and Perezgonzalez, Jose D and Pfister, Roland and Rahona, Juan J and Rodr{\'{i}}guez-Medina, David A and Rom{\~{a}}o, Xavier and Ruiz-Fern{\'{a}}ndez, Susana and Suarez, Isabel and Tegethoff, Marion and Tejo, Mauricio and van de Schoot, Rens and Vankov, Ivan I and Velasco-Forero, Santiago and Wang, Tonghui and Yamada, Yuki and Zoppino, Felipe C M and Marmolejo-Ramos, Fernando},
doi = {10.3389/fpsyg.2018.00699},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Trafimow et al. - 2018 - Manipulating the Alpha Level Cannot Cure Significance Testing.pdf:pdf},
issn = {1664-1078},
journal = {Frontiers in psychology},
keywords = {cumulative evidence,decision criteria, universal,decision making,effect sizes, inflation of,null hypothesis testing,p-value,p-values, blanket threshold,p-values, lower threshold,p-values, regression to the mean,significance testing,statistical significance,statistical toolbox},
mendeley-tags = {cumulative evidence,decision criteria, universal,effect sizes, inflation of,p-values, blanket threshold,p-values, lower threshold,p-values, regression to the mean,statistical toolbox},
pages = {699},
pmid = {29867666},
publisher = {Frontiers Media SA},
title = {{Manipulating the Alpha Level Cannot Cure Significance Testing.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/29867666 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5962803},
volume = {9},
year = {2018}
}
@article{Benjamin2018,
abstract = {We propose to change the default P-value threshold for statistical significance from 0.05 to 0.005 for claims of new discoveries.},
annote = {This a poor compromise. Sure, it will reduce noise and encourage larger sample sizes, but it's like duct taping a leaking pipe; it might seem to fix things temporarily, but eventually, we know that pipe's gonna need an overhaul. In addition, this quick fix might invite people to rely on this and feel less of a need to actually find real solutions.},
author = {Benjamin, Daniel J. and Berger, James O. and Johannesson, Magnus and Nosek, Brian A. and Wagenmakers, Eric-Jan and Berk, Richard and Bollen, Kenneth A. and Brembs, Bj{\"{o}}rn and Brown, Lawrence and Camerer, Colin and Cesarini, David and Chambers, Christopher D. and Clyde, Merlise and Cook, Thomas D. and {De Boeck}, Paul and Dienes, Zoltan and Dreber, Anna and Easwaran, Kenny and Efferson, Charles and Fehr, Ernst and Fidler, Fiona and Field, Andy P. and Forster, Malcolm and George, Edward I. and Gonzalez, Richard and Goodman, Steven and Green, Edwin and Green, Donald P. and Greenwald, Anthony G. and Hadfield, Jarrod D. and Hedges, Larry V. and Held, Leonhard and {Hua Ho}, Teck and Hoijtink, Herbert and Hruschka, Daniel J. and Imai, Kosuke and Imbens, Guido and Ioannidis, John P. A. and Jeon, Minjeong and Jones, James Holland and Kirchler, Michael and Laibson, David and List, John and Little, Roderick and Lupia, Arthur and Machery, Edouard and Maxwell, Scott E. and McCarthy, Michael and Moore, Don A. and Morgan, Stephen L. and Munaf{\'{o}}, Marcus and Nakagawa, Shinichi and Nyhan, Brendan and Parker, Timothy H. and Pericchi, Luis and Perugini, Marco and Rouder, Jeff and Rousseau, Judith and Savalei, Victoria and Sch{\"{o}}nbrodt, Felix D. and Sellke, Thomas and Sinclair, Betsy and Tingley, Dustin and {Van Zandt}, Trisha and Vazire, Simine and Watts, Duncan J. and Winship, Christopher and Wolpert, Robert L. and Xie, Yu and Young, Cristobal and Zinman, Jonathan and Johnson, Valen E.},
doi = {10.1038/s41562-017-0189-z},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Benjamin et al. - 2018 - Redefine statistical significance.pdf:pdf},
issn = {2397-3374},
journal = {Nature Human Behaviour},
keywords = {Human behaviour,Statistics,alternatives,lower threshold,no agreement,p-values},
mendeley-tags = {alternatives,lower threshold,no agreement,p-values},
month = {jan},
number = {1},
pages = {6--10},
publisher = {Nature Publishing Group},
title = {{Redefine statistical significance}},
url = {http://www.nature.com/articles/s41562-017-0189-z},
volume = {2},
year = {2018}
}
@article{Lindsay2015a,
author = {Lindsay, D. Stephen},
doi = {10.1177/0956797615616374},
issn = {0956-7976},
journal = {Psychological Science},
month = {dec},
number = {12},
pages = {1827--1832},
publisher = {SAGE PublicationsSage CA: Los Angeles, CA},
title = {{Replication in Psychological Science}},
url = {http://journals.sagepub.com/doi/10.1177/0956797615616374},
volume = {26},
year = {2015}
}
@article{Dienes2011,
abstract = {Researchers are often confused about what can be inferred from significance tests. One problem occurs when people apply Bayesian intuitions to significance testing-two approaches that must be firmly separated. This article presents some common situations in which the approaches come to different conclusions; you can see where your intuitions initially lie. The situations include multiple testing, deciding when to stop running participants, and when a theory was thought of relative to finding out results. The interpretation of nonsignificant results has also been persistently problematic in a way that Bayesian inference can clarify. The Bayesian and orthodox approaches are placed in the context of different notions of rationality, and I accuse myself and others as having been irrational in the way we have been using statistics on a key notion of rationality. The reader is shown how to apply Bayesian inference in practice, using free online software, to allow more coherent inferences from data.},
author = {Dienes, Zoltan},
doi = {10.1177/1745691611406920},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Dienes - 2011 - Bayesian Versus Orthodox Statistics Which Side Are You On.pdf:pdf},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
keywords = {Bayes,evidence,likelihood principle,significance testing,statistical inference},
month = {may},
number = {3},
pages = {274--290},
pmid = {26168518},
title = {{Bayesian Versus Orthodox Statistics: Which Side Are You On?}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/26168518 http://journals.sagepub.com/doi/10.1177/1745691611406920},
volume = {6},
year = {2011}
}
@article{Maxwell2015,
abstract = {Psychology has recently been viewed as facing a replication crisis because efforts to replicate past study findings frequently do not show the same result. Often, the first study showed a statistically significant result but the replication does not. Questions then arise about whether the first study results were false positives, and whether the replication study correctly indicates that there is truly no effect after all. This article suggests these so-called failures to replicate may not be failures at all, but rather are the result of low statistical power in single replication studies, and the result of failure to appreciate the need for multiple replications in order to have enough power to identify true effects. We provide examples of these power problems and suggest some solutions using Bayesian statistics and meta-analysis. Although the need for multiple replication studies may frustrate those who would prefer quick answers to psychology's alleged crisis, the large sample sizes typically needed to provide firm evidence will almost always require concerted efforts from multiple investigators. As a result, it remains to be seen how many of the recently claimed failures to replicate will be supported or instead may turn out to be artifacts of inadequate sample sizes and single study replications.},
author = {Maxwell, Scott E. and Lau, Michael Y. and Howard, George S.},
doi = {10.1037/a0039400},
issn = {1935-990X},
journal = {American Psychologist},
month = {sep},
number = {6},
pages = {487--498},
pmid = {26348332},
title = {{Is psychology suffering from a replication crisis? What does “failure to replicate” really mean?}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/26348332 http://doi.apa.org/getdoi.cfm?doi=10.1037/a0039400},
volume = {70},
year = {2015}
}
@article{Howard2000,
abstract = {Some methodologists have recently suggested that scientific psychology's over-reliance on null hypothesis significance testing (NHST) impedes the progress of the discipline. In response, a number of defenders have maintained that NHST continues to play a vital role in psychological research. Both sides of the argument to date have been presented abstractly. The authors take a different approach to this issue by illustrating the use of NHST along with 2 possible alternatives (meta-analysis as a primary data analysis strategy and Bayesian approaches) in a series of 3 studies. Comparing and contrasting the approaches on actual data brings out the strengths and weaknesses of each approach. The exercise demonstrates that the approaches are not mutually exclusive but instead can be used to complement one another.},
author = {Howard, G S and Maxwell, S E and Fleming, K J},
issn = {1082-989X},
journal = {Psychological methods},
month = {sep},
number = {3},
pages = {315--32},
pmid = {11004870},
title = {{The proof of the pudding: an illustration of the relative strengths of null hypothesis, meta-analysis, and Bayesian analysis.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/11004870},
volume = {5},
year = {2000}
}
@article{Shrout2018,
abstract = {Psychology advances knowledge by testing statistical hypotheses using empirical observations and data. The expectation is that most statistically significant findings can be replicated in new data and in new laboratories, but in practice many findings have replicated less often than expected, leading to claims of a replication crisis. We review recent methodological literature on questionable research practices, meta-analysis, and power analysis to explain the apparently high rates of failure to replicate. Psychologists can improve research practices to advance knowledge in ways that improve replicability. We recommend that researchers adopt open science conventions of preregi-stration and full disclosure and that replication efforts be based on multiple studies rather than on a single replication attempt. We call for more sophisticated power analyses, careful consideration of the various influences on effect sizes, and more complete disclosure of nonsignificant as well as statistically significant findings.},
author = {Shrout, Patrick E. and Rodgers, Joseph Lee},
doi = {10.1146/annurev-psych-122216-011845},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Shrout, Rodgers - 2018 - Psychology, Science, and Knowledge Construction Broadening Perspectives from the Replication Crisis.pdf:pdf},
issn = {0066-4308},
journal = {Annual Review of Psychology},
keywords = {methodology,replication,statistics},
month = {jan},
number = {1},
pages = {487--510},
publisher = {Annual Reviews},
title = {{Psychology, Science, and Knowledge Construction: Broadening Perspectives from the Replication Crisis}},
url = {http://www.annualreviews.org/doi/10.1146/annurev-psych-122216-011845},
volume = {69},
year = {2018}
}
@article{Thiese2016,
abstract = {Application and interpretation of statistical evaluation of relationships is a necessary element in biomedical research. Statistical analyses rely on P value to demonstrate relationships. The traditional level of significance, P{\textless}0.05, can be negatively impacted by small sample size, bias, and random error, and has evolved to include interpretation of statistical trends, correction factors for multiple analyses, and acceptance of statistical significance for P{\textgreater}0.05 for complex relationships such as effect modification.},
author = {Thiese, Matthew S. and Ronna, Brenden and Ott, Ulrike},
doi = {10.21037/jtd.2016.08.16},
isbn = {2072-1439},
issn = {20776624},
journal = {Journal of Thoracic Disease},
number = {9},
pmid = {27747028},
title = {{P value interpretations and considerations}},
volume = {8},
year = {2016}
}
@article{Howard2000a,
author = {Howard, George S. and Maxwell, Scott E. and Fleming, Kevin J.},
doi = {10.1037/1082-989X.5.3.315},
issn = {1939-1463},
journal = {Psychological Methods},
number = {3},
pages = {315--332},
title = {{The proof of the pudding: An illustration of the relative strengths of null hypothesis, meta-analysis, and Bayesian analysis.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/1082-989X.5.3.315},
volume = {5},
year = {2000}
}
@article{Branch2014,
abstract = {Six decades-worth of published information has shown irrefutably that null-hypothesis significance tests (NHSTs) provide no information about the reliability of research outcomes. Nevertheless, they are still the core of editorial decision-making in Psychology. Two reasons appear to contribute to the continuing practice. One, survey information suggests that a majority of psychological researchers incorrectly believe that p values provide information about reliability of results. Two, a position sometimes taken is that using them to make decisions has been essentially benign. The mistaken belief has been pointed out many times, so it is briefly covered because of the apparent persistence of the misunderstanding. The idea that NHSTs have been benign is challenged by seven “side-effects” that continue to retard effective development of psychological science. The article concludes with both a few suggestions about possible alternatives and a challenge to psychological researchers to develop new methods that ...},
author = {Branch, Marc},
doi = {10.1177/0959354314525282},
issn = {0959-3543},
journal = {Theory {\&} Psychology},
keywords = {evolving science,generality,p values,reliability,replication},
month = {apr},
number = {2},
pages = {256--277},
publisher = {SAGE PublicationsSage UK: London, England},
title = {{Malignant side effects of null-hypothesis significance testing}},
url = {http://journals.sagepub.com/doi/10.1177/0959354314525282},
volume = {24},
year = {2014}
}
@article{Stroebe2014,
abstract = {There has been increasing criticism of the way psychologists conduct and analyze studies. These critiques as well as failures to replicate several high-profile studies have been used as justification to proclaim a “replication crisis” in psychology. Psychologists are encouraged to conduct more “exact” replications of published studies to assess the reproducibility of psychological research. This article argues that the alleged “crisis of replicability” is primarily due to an epistemological misunderstanding that emphasizes the phenomenon instead of its underlying mechanisms. As a consequence, a replicated phenomenon may not serve as a rigorous test of a theoretical hypothesis because identical operationalizations of variables in studies conducted at different times and with different subject populations might test different theoretical constructs. Therefore, we propose that for meaningful replications, attempts at reinstating the original circumstances are not sufficient. Instead, replicators must ascertain that conditions are realized that reflect the theoretical variable(s) manipulated (and/or measured) in the original study.},
author = {Stroebe, Wolfgang and Strack, Fritz},
doi = {10.1177/1745691613514450},
isbn = {1745-6916$\backslash$r1745-6924},
issn = {17456916},
journal = {Perspectives on Psychological Science},
number = {1},
pmid = {26173241},
title = {{The Alleged Crisis and the Illusion of Exact Replication}},
volume = {9},
year = {2014}
}
@article{Sakaluk2016,
abstract = {While outlining his vision of The New Statistics, Cumming (2014) proposes that a more rigorous and cumulative psychological science will be built, in part, by having psychologists abandon traditional null-hypothesis significance testing (NHST) approaches, and conducting small-scale meta-analyses on their data whenever possible. In the present paper, I propose an alternative system for conducting rigorous and replicable psychological investigations, which I describe as Exploring Small, Confirming Big. I begin with a critical evaluation of the merits of NHST and small-scale meta-analyses, and argue that NHST does have a valuable role in the scientific process, whereas small-scale meta-analyses will do little to advance a cumulative science. I then present an overview of an alternative system for producing cumulative and replicable psychological research: Exploring Small, Confirming Big. It involves a two-step process to psychological research, consisting of (1) small N investigation(s), in which psychologists use NHST to develop exploratory models; and (2) strong, confirmatory tests of exploratory models, by analyzing new and/or existing large N datasets with variables that capture the effect(s) of interest from the Exploring Small stage. I conclude by discussing several anticipated benefits and challenges of adopting the Exploring Small, Confirming Big approach.},
author = {Sakaluk, John Kitchener},
doi = {10.1016/J.JESP.2015.09.013},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Sakaluk - 2016 - Exploring Small, Confirming Big An alternative system to The New Statistics for advancing cumulative and replicable psy.pdf:pdf},
issn = {0022-1031},
journal = {Journal of Experimental Social Psychology},
keywords = {EDA vs. CDA: misunderstandings,NHST as an exploratory tool,NHST: defenses of,p-curve,project: exonerating EDA,project: specific hypotheses},
mendeley-tags = {EDA vs. CDA: misunderstandings,NHST as an exploratory tool,NHST: defenses of,p-curve,project: exonerating EDA,project: specific hypotheses},
month = {sep},
pages = {47--54},
publisher = {Academic Press},
title = {{Exploring Small, Confirming Big: An alternative system to The New Statistics for advancing cumulative and replicable psychological research}},
url = {https://www.sciencedirect.com/science/article/pii/S0022103115001237},
volume = {66},
year = {2016}
}
@article{Rouder2009,
author = {Rouder, Jeffrey N. and Speckman, Paul L. and Sun, Dongchu and Morey, Richard D. and Iverson, Geoffrey},
doi = {10.3758/PBR.16.2.225},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Rouder et al. - 2009 - Bayesian t tests for accepting and rejecting the null hypothesis.pdf:pdf},
issn = {1069-9384},
journal = {Psychonomic Bulletin {\&} Review},
month = {apr},
number = {2},
pages = {225--237},
publisher = {Springer-Verlag},
title = {{Bayesian t tests for accepting and rejecting the null hypothesis}},
url = {http://www.springerlink.com/index/10.3758/PBR.16.2.225},
volume = {16},
year = {2009}
}
@misc{Czerlinski1999,
abstract = {Investigates whether it pays to be fast and frugal and when it is better to use a more complex strategy such as multiple linear regression. The authors describe a competition that involves making predictions in 20 diverse environments that cover disparate domains from the objective number of car accidents on a stretch of highway to the subjective ratings of the attractiveness of public figures. An environment consists of objects, each associated with a criterion to be predicted and a number of cues that may be helpful in predicting it. The task in the competition was to infer which of 2 objects scores higher on the criterion. Results indicate that mental strategies need not be like multiple regression to make accurate inferences about their environments, simple heuristics can achieve the same goal. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
address = {New York, NY, US},
author = {Czerlinski, Jean and Gigerenzer, Gerd and Goldstein, Daniel G},
booktitle = {Simple heuristics that make us smart.},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Czerlinski, Gigerenzer, Goldstein - 1999 - How good are simple heuristics.pdf:pdf},
isbn = {0-19-512156-2 (Hardcover)},
keywords = {*Decision Making,*Heuristic Modeling,*Heuristics,*Statistical Regression,Strategies},
pages = {97--118},
publisher = {Oxford University Press},
series = {Evolution and cognition.},
title = {{How good are simple heuristics?}},
year = {1999}
}
@article{Thompson2004,
abstract = {The present article explores various reasons why psychology and education have not proceeded further with adopting reformed statistical practices advocated for several decades. Initially, a brief statistical history is presented. Then both psychological and sociological barriers to reform are considered. Perhaps economics can learn from some of the mistakes made within psychology and education, and invent its own new pitfalls, rather than becoming mired in the same crevasses discovered by other disciplines.},
author = {Thompson, Bruce},
doi = {10.1016/j.socec.2004.09.034},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Thompson - 2004 - The “significance” crisis in psychology and education.pdf:pdf},
issn = {10535357},
journal = {The Journal of Socio-Economics},
number = {5},
pages = {607--613},
title = {{The “significance” crisis in psychology and education}},
volume = {33},
year = {2004}
}
@article{Zellner2004,
author = {Zellner, Arnold},
doi = {10.1016/j.socec.2004.09.032},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Zellner - 2004 - To test or not to test and if so, how Comments on “size matters”.pdf:pdf},
issn = {10535357},
journal = {The Journal of Socio-Economics},
number = {5},
pages = {581--586},
title = {{To test or not to test and if so, how?: Comments on “size matters”}},
volume = {33},
year = {2004}
}
@article{Tversky1971a,
author = {Tversky, Amos and Kahneman, Daniel},
doi = {10.1037/h0031322},
issn = {1939-1455},
journal = {Psychological Bulletin},
number = {2},
pages = {105--110},
title = {{Belief in the law of small numbers.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/h0031322},
volume = {76},
year = {1971}
}
@article{Roberts2000a,
author = {Roberts, Seth and Pashler, Harold},
doi = {10.1037//0033-295X.107.2.358},
issn = {0033-295X},
journal = {Psychological Review},
number = {2},
pages = {358--367},
title = {{How persuasive is a good fit? A comment on theory testing.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/0033-295X.107.2.358},
volume = {107},
year = {2000}
}
@article{Anderson2000,
author = {Anderson, David R. and Burnham, Kenneth P. and Thompson, William L.},
doi = {10.2307/3803199},
issn = {0022541X},
journal = {The Journal of Wildlife Management},
month = {oct},
number = {4},
pages = {912},
title = {{Null Hypothesis Testing: Problems, Prevalence, and an Alternative}},
url = {https://www.jstor.org/stable/3803199?origin=crossref},
volume = {64},
year = {2000}
}
@article{Lecoutre2003,
author = {Lecoutre, Marie-Paule and Poitevineau, Jacques and Lecoutre, Bruno},
doi = {10.1080/00207590244000250},
issn = {0020-7594},
journal = {International Journal of Psychology},
month = {feb},
number = {1},
pages = {37--45},
title = {{Even statisticians are not immune to misinterpretations of Null Hypothesis Significance Tests}},
url = {http://doi.wiley.com/10.1080/00207590244000250},
volume = {38},
year = {2003}
}
@article{Falk1995,
author = {Falk, Ruma and Greenbaum, Charles W.},
doi = {10.1177/0959354395051004},
issn = {0959-3543},
journal = {Theory {\&} Psychology},
month = {feb},
number = {1},
pages = {75--98},
title = {{Significance Tests Die Hard}},
url = {http://journals.sagepub.com/doi/10.1177/0959354395051004},
volume = {5},
year = {1995}
}
@article{Nickerson2000,
author = {Nickerson, Raymond S.},
doi = {10.1037//1082-989X.5.2.241},
issn = {1082-989X},
journal = {Psychological Methods},
number = {2},
pages = {241--301},
title = {{Null hypothesis significance testing: A review of an old and continuing controversy.}},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/1082-989X.5.2.241},
volume = {5},
year = {2000}
}
@article{Gigerenzer2004,
abstract = {Statistical rituals largely eliminate statistical thinking in the social sciences. Rituals are indispensable for identification with social groups, but they should be the subject rather than the procedure of science. What I call the “null ritual” consists of three steps: (1) set up a statistical null hypothesis, but do not specify your own hypothesis nor any alternative hypothesis, (2) use the 5{\%} significance level for rejecting the null and accepting your hypothesis, and (3) always perform this procedure. I report evidence of the resulting collective confusion and fears about sanctions on the part of students and teachers, researchers and editors, as well as textbook writers.},
annote = {This article talks about an entire toolkit for researchers. It would be nice to have a resource that explains all the tools, gives examples of when they would be appropriate, and illustrates how to use them.},
author = {Gigerenzer, Gerd},
doi = {10.1016/J.SOCEC.2004.09.033},
file = {:Users/dustinfife/Library/Application Support/Mendeley Desktop/Downloaded/Gigerenzer - 2004 - Mindless statistics.pdf:pdf},
issn = {1053-5357},
journal = {The Journal of Socio-Economics},
keywords = {Fisher and NHST,Neyman-Pearson and NHST,editors: pressure to NHST,editors: pressure to not NHST,hypotheses:specific,judgment,judgment: circumstantial,meehl,power: estimation,power: rarely computed,project: NHST,project: specific hypotheses,random sampling and p-values,science: NHST not required for good,social pressures: negative affects,teachers of statistics,textbooks: errors,toolbox},
mendeley-tags = {Fisher and NHST,Neyman-Pearson and NHST,editors: pressure to NHST,editors: pressure to not NHST,hypotheses:specific,judgment,judgment: circumstantial,meehl,power: estimation,power: rarely computed,project: NHST,project: specific hypotheses,random sampling and p-values,science: NHST not required for good,social pressures: negative affects,teachers of statistics,textbooks: errors,toolbox},
month = {nov},
number = {5},
pages = {587--606},
publisher = {North-Holland},
title = {{Mindless statistics}},
url = {https://www.sciencedirect.com/science/article/pii/S1053535704000927},
volume = {33},
year = {2004}
}