-
Notifications
You must be signed in to change notification settings - Fork 2
/
book_citations.bib
10651 lines (10641 loc) · 981 KB
/
book_citations.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Hubbard2019,
abstract = {Recent efforts by the American Statistical Association to improve statistical practice, especially in countering the misuse and abuse of null hypothesis significance testing (NHST) and p-values, are to be welcomed. But will they be successful? The present study offers compelling evidence that this will be an extraordinarily difficult task. Dramatic citation-count data on 25 articles and books severely critical of NHST's negative impact on good science, underlining that this issue was/is well known, did nothing to stem its usage over the period 1960–2007. On the contrary, employment of NHST increased during this time. To be successful in this endeavor, as well as restoring the relevance of the statistics profession to the scientific community in the 21st century, the ASA must be prepared to dispense detailed advice. This includes specifying those situations, if they can be identified, in which the p-value plays a clearly valuable role in data analysis and interpretation. The ASA might also consider a statement that recommends abandoning the use of p-values.},
author = {Hubbard, R.},
doi = {10.1080/00031305.2018.1497540},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{Will the ASA's Efforts to Improve Statistical Practice be Successful? Some Evidence to the Contrary}},
volume = {73},
year = {2019}
}
@article{Pogrow2019,
abstract = {Relying on effect size as a measure of practical significance is turning out to be just as misleading as using p-values to determine the effectiveness of interventions for improving clinical practice in complex organizations such as schools. This article explains how effect sizes have misdirected practice in education and other disciplines. Even when effect size is incorporated into RCT research the recommendations of whether interventions are effective are misleading and generally useless to practitioners. As a result, a new criterion of practical benefit is recommended for evaluating research findings about the effectiveness of interventions in complex organizations where benchmarks of existing performance exist. Practical benefit exists when the unadjusted performance of an experimental group provides a noticeable advantage over an existing benchmark. Some basic principles for determining practical benefit are provided. Practical benefit is more intuitive and is expected to enable leaders to make more accurate assessments as to whether published research findings are likely to produce noticeable improvements in their organizations. In addition, practical benefit is used routinely as the research criterion for the alternative scientific methodology of improvement science that has an established track record of being a more efficient way to develop new interventions that improve practice dramatically than RCT research. Finally, the problems with practical significance suggest that the research community should seek different inferential methods for research designed to improve clinical performance in complex organizations, as compared to methods for testing theories and medicines.},
author = {Pogrow, S.},
doi = {10.1080/00031305.2018.1549101},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{How Effect Size (Practical Significance) Misleads Clinical Practice: The Case for Switching to Practical Benefit to Assess Applied Research Findings}},
volume = {73},
year = {2019}
}
@article{Gannon2019,
abstract = {This article argues that researchers do not need to completely abandon the p-value, the best-known significance index, but should instead stop using significance levels that do not depend on sample sizes. A testing procedure is developed using a mixture of frequentist and Bayesian tools, with a significance level that is a function of sample size, obtained from a generalized form of the Neyman–Pearson Lemma that minimizes a linear combination of $\alpha$, the probability of rejecting a true null hypothesis, and $\beta$, the probability of failing to reject a false null, instead of fixing $\alpha$ and minimizing $\beta$. The resulting hypothesis tests do not violate the Likelihood Principle and do not require any constraints on the dimensionalities of the sample space and parameter space. The procedure includes an ordering of the entire sample space and uses predictive probability (density) functions, allowing for testing of both simple and compound hypotheses. Accessible examples are presented to highlight specific characteristics of the new tests.},
author = {Gannon, M. and DeBragan{\c{c}}aPereira, C. and Polpo, A.},
doi = {10.1080/00031305.2018.1518268},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{Blending Bayesian and Classical Tools to Define Optimal Sample-Size-Dependent Significance Levels}},
volume = {73},
year = {2019}
}
@article{Errington2019,
abstract = {Most scientific research is conducted by small teams of investigators who together formulate hypotheses, collect data, conduct analyses, and report novel findings. These teams operate independently as vertically integrated silos. Here we argue that scientific research that is horizontally distributed can provide substantial complementary value, aiming to maximize available resources, promote inclusiveness and transparency, and increase rigor and reliability. This alternative approach enables researchers to tackle ambitious projects that would not be possible under the standard model. Crowdsourced scientific initiatives vary in the degree of communication between project members from largely independent work curated by a coordination team to crowd collaboration on shared activities. The potential benefits and challenges of large-scale collaboration span the entire research process: ideation, study design, data collection, data analysis, reporting, and peer review. Complementing traditional small science with crowdsourced approaches can accelerate the progress of science and improve the quality of scientific research.},
author = {Errington, T. and Lai, C. and Silberzahn, R. and Uhlmann, E. and Kidwell, M. and Chartier, C. and Riegelman, A. and Nosek, B. and McCarthy, R. and Ebersole, C.},
doi = {10.1177/1745691619850561},
issn = {17456916},
journal = {Perspectives on Psychological Science},
number = {5},
publisher = {SAGE},
title = {{Scientific Utopia III: Crowdsourcing Science}},
volume = {14},
year = {2019}
}
@article{Calin-Jageman2019,
abstract = {The “New Statistics” emphasizes effect sizes, confidence intervals, meta-analysis, and the use of Open Science practices. We present three specific ways in which a New Statistics approach can help improve scientific practice: by reducing overconfidence in small samples, by reducing confirmation bias, and by fostering more cautious judgments of consistency. We illustrate these points through consideration of the literature on oxytocin and human trust, a research area that typifies some of the endemic problems that arise with poor statistical practice.},
author = {Calin-Jageman, R. and Cumming, G.},
doi = {10.1080/00031305.2018.1518266},
issn = {00031305},
journal = {American Statistician},
number = {1},
publisher = {Taylor {\&} Francis},
title = {{The New Statistics for Better Science: Ask How Much, How Uncertain, and What Else Is Known}},
volume = {73},
year = {2019}
}
@article{Lee2018,
abstract = {Conducting scientific inquiry is expected to help students make informed decisions; however, how exactly it can help is rarely explained in science education standards. According to classroom studies, inquiry that students conduct in science classes seems to have little effect on their decision-making. Predetermined values play a large role in students' decision-making, but students do not explore these values or evaluate whether they are appropriate to the particular issue they are deciding, and they often ignore relevant scientific information. We explore how to connect inquiry and values, and how this connection can contribute to informed decision-making based on John Dewey's philosophy. Dewey argues that scientific inquiry should include value judgments and that conducting inquiry can improve the ability to make good value judgments. Value judgment is essential to informed, rational decision-making, and Dewey's ideas can explain how conducting inquiry can contribute to make an informed decision through value judgment. According to Dewey, each value judgment during inquiry is a practical judgment guiding action, and students can improve their value judgments by evaluating their actions during scientific inquiry. Thus, we suggest that students need an opportunity to explore values through scientific inquiry and that practicing value judgment will help informed decision-makings.},
author = {Lee, Eun Ah and Brown, Matthew J.},
doi = {10.1007/s11191-017-9952-9},
file = {::},
issn = {15731901},
journal = {Science and Education},
keywords = {History,Philosophy of Science,Science Education,general},
month = {mar},
number = {1-2},
pages = {63--79},
publisher = {Springer Netherlands},
title = {{Connecting Inquiry and Values in Science Education: An Approach Based on John Dewey's Philosophy}},
url = {https://doi.org/10.1007/s11191-017-9952-9},
volume = {27},
year = {2018}
}
@article{Day2019,
abstract = {In this paper, we consider some of the ways in which personal and professional values shape the ways in which criminal justice professionals practice. Using the example of offender rehabilitation, we suggest that it is the values that we hold that determine how knowledge about what works is defined and how priorities for professional engagement are set. Specifically, we argue that there has been too great an emphasis on a particular set of epistemic values and insufficient attention paid to the importance of prudential and moral or social cultural values in the way in which rehabilitation services have developed. This has resulted in practices that have largely failed to deliver the types of social benefit that are intended, supported by a policy paradigm that privileges particular approaches. The argument is illustrated with reference to the failure to deliver effective rehabilitation to Indigenous peoples in both Australia and New Zealand. (PsycINFO Database Record (c) 2018 APA, all rights reserved)},
author = {Day, Andrew and Tamatea, Armon and Geia, Lynore},
doi = {10.1080/1068316x.2018.1543422},
issn = {1068-316X},
journal = {Psychology, Crime {\&} Law},
keywords = {Accreditation,best practice,community,indigenous,knowledge,offender rehabilitation},
month = {jul},
number = {6},
pages = {577--588},
publisher = {Informa UK Limited},
title = {{Scientific inquiry and offender rehabilitation: the importance of epistemic and prudential values}},
url = {https://www.tandfonline.com/doi/abs/10.1080/1068316X.2018.1543422},
volume = {25},
year = {2019}
}
@book{Longino1990,
author = {Longino, Helen E},
isbn = {0691020515},
publisher = {Princeton University Press},
title = {{Science as social knowledge: Values and objectivity in scientific inquiry}},
year = {1990}
}
@misc{AmericanPsychologicalAssociation2017,
author = {{American Psychological Association}},
booktitle = {American Psychological Association},
title = {{Ethical principles of psychologists and code of conduct}},
url = {https://www.apa.org/ethics/code},
urldate = {2020-08-17},
year = {2017}
}
@misc{Hardwicke2018,
abstract = {Registered reports present a substantial departure from traditional publishing models with the goal of enhancing the transparency and credibility of the scientific literature. We map the evolving universe of registered reports to assess their growth, implementation and shortcomings at journals across scientific disciplines.},
author = {Hardwicke, Tom E. and Ioannidis, John P.A.},
booktitle = {Nature Human Behaviour},
doi = {10.1038/s41562-018-0444-y},
file = {::},
issn = {23973374},
keywords = {Human behaviour,Peer review,Publishing},
month = {nov},
number = {11},
pages = {793--796},
pmid = {31558810},
publisher = {Nature Publishing Group},
title = {{Mapping the universe of registered reports}},
url = {https://www.nature.com/articles/s41562-018-0444-y},
volume = {2},
year = {2018}
}
@article{Gabry2019,
abstract = {Bayesian data analysis is about more than just computing a posterior distribution, and Bayesian visualization is about more than trace plots of Markov chains. Practical Bayesian data analysis, like all data analysis, is an iterative process of model building, inference, model checking and evaluation, and model expansion. Visualization is helpful in each of these stages of the Bayesian workflow and it is indispensable when drawing inferences from the types of modern, high dimensional models that are used by applied researchers.},
archivePrefix = {arXiv},
arxivId = {1709.01449},
author = {Gabry, Jonah and Simpson, Daniel and Vehtari, Aki and Betancourt, Michael and Gelman, Andrew},
doi = {10.1111/rssa.12378},
eprint = {1709.01449},
file = {::},
issn = {1467985X},
journal = {Journal of the Royal Statistical Society. Series A: Statistics in Society},
number = {2},
title = {{Visualization in Bayesian workflow}},
volume = {182},
year = {2019}
}
@misc{Hardwicke2020,
abstract = {While some scientists study insects, molecules, brains, or clouds, other scientists study science itself. Meta-research, or research-on-research, is a burgeoning discipline that investigates efficiency, quality, and bias in the scientific ecosystem, topics that have become especially relevant amid widespread concerns about the credibility of the scientific literature. Meta-research may help calibrate the scientific ecosystem toward higher standards by providing empirical evidence that informs the iterative generation and refinement of reform initiatives. We introduce a translational framework that involves (a) identifying problems, (b) investigating problems, (c) developing solutions, and (d) evaluating solutions. In each of these areas, we review key meta-research endeavors and discuss several examples of prior and ongoing work. The scientific ecosystem is perpetually evolving; the discipline of meta-research presents an opportunity to use empirical evidence to guide its development and maximize its potential.},
author = {Hardwicke, Tom E. and Serghiou, Stylianos and Janiaud, Perrine and Danchev, Valentin and Cr{\"{u}}well, Sophia and Goodman, Steven N. and Ioannidis, John P.A.},
booktitle = {Annual Review of Statistics and Its Application},
doi = {10.1146/annurev-statistics-031219-041104},
file = {::},
issn = {2326831X},
title = {{Calibrating the scientific ecosystem through meta-research}},
volume = {7},
year = {2020}
}
@article{Szollosi2020,
author = {Szollosi, Aba and Kellen, David and Navarro, Danielle J. and Shiffrin, Richard and van Rooij, Iris and {Van Zandt}, Trisha and Donkin, Chris},
doi = {10.1016/j.tics.2019.11.009},
issn = {1879307X},
journal = {Trends in Cognitive Sciences},
keywords = {inference,preregistration,theory development},
month = {feb},
number = {2},
pages = {94--95},
pmid = {31892461},
publisher = {Elsevier Ltd},
title = {{Is Preregistration Worthwhile?}},
url = {http://www.cell.com/article/S1364661319302852/fulltext http://www.cell.com/article/S1364661319302852/abstract https://www.cell.com/trends/cognitive-sciences/abstract/S1364-6613(19)30285-2},
volume = {24},
year = {2020}
}
@article{Nosek2012,
author = {Nosek, Brian A and Spies, Jeffrey R and Motyl, Matt},
issn = {1745-6916},
journal = {Perspectives on Psychological Science},
number = {6},
pages = {615--631},
publisher = {Sage Publications Sage CA: Los Angeles, CA},
title = {{Scientific utopia: II. Restructuring incentives and practices to promote truth over publishability}},
volume = {7},
year = {2012}
}
@book{Douglas2009,
abstract = {The role of science in policymaking has gained unprecedented stature in the United States, raising questions about the place of science and scientific expertise in the democratic process. Some scientists have been given considerable epistemic authority in shaping policy on issues of great moral and cultural significance, and the politicizing of these issues has become highly contentious. Since World War II, most philosophers of science have purported the concept that science should be "value-free." InScience, Policy and the Value-Free Ideal,Heather E. Douglas argues that such an ideal is neither adequate nor desirable for science. She contends that the moral responsibilities of scientists require the consideration of values even at the heart of science. She lobbies for a new ideal in which values serve an essential function throughout scientific inquiry, but where the role values play is constrained at key points, thus protecting the integrity and objectivity of science. In this vein, Douglas outlines a system for the application of values to guide scientists through points of uncertainty fraught with moral valence. Following a philosophical analysis of the historical background of science advising and the value-free ideal, Douglas defines how values should-and should not-function in science. She discusses the distinctive direct and indirect roles for values in reasoning, and outlines seven senses of objectivity, showing how each can be employed to determine the reliability of scientific claims. Douglas then uses these philosophical insights to clarify the distinction between junk science and sound science to be used in policymaking. In conclusion, she calls for greater openness on the values utilized in policymaking, and more public participation in the policymaking process, by suggesting various models for effective use of both the public and experts in key risk assessments.},
author = {Douglas, Heather},
doi = {10.2307/j.ctt6wrc78},
isbn = {9780822960263},
month = {jul},
publisher = {University of Pittsburgh Press},
title = {{Science, Policy, and the Value-Free Ideal}},
url = {http://www.jstor.org/stable/j.ctt6wrc78},
year = {2009}
}
@incollection{sep-scientific-objectivity,
author = {Reiss, Julian and Sprenger, Jan},
booktitle = {The Stanford Encyclopedia of Philosophy},
edition = {Winter 201},
editor = {Zalta, Edward N},
howpublished = {$\backslash$url{\{}https://plato.stanford.edu/archives/win2017/entries/scientific-objectivity/{\}}},
publisher = {Metaphysics Research Lab, Stanford University},
title = {{Scientific Objectivity}},
year = {2017}
}
@misc{Williams2006,
abstract = {Objectivity and value freedom have often been conflated in the philosophical and sociological literature. While value freedom construed as an absence of social and moral values in scientific work has been discredited, defenders of value freedom bracket off methodological values or practices from social and moral ones. In this paper I will first show how values exist along a continuum and argue that science is and should be value based. One of these values is necessarily objectivity for science to be possible. However the version of objectivity I will describe is socially situated in methodological practice, but also crucially in the particular purpose of a given science. Objectivity (or its absence) may be transferred vertically from practices, goals, or discourses outside science through several levels to that of the daytoday activities of the scientist. It is also possible for this transfer to occur in the other direction and indeed objectivity can be situated in extrascientific practices and discourses. Objectivity (or its absence) may also be transferred horizontally within particular methodological practice to other disciplines or parts of a discipline. Ultimately a socially situated objectivity is an achievement of the community of science. I will use some brief contemporary and historical illustrations from science and the intersection of science and public policy to show how objectivity has been achieved or failed.},
author = {Williams, Malcolm},
booktitle = {Social Epistemology},
doi = {10.1080/02691720600807468},
issn = {02691728},
keywords = {Objectivity,Realism,Scientific Method,Truth,Value Freedom},
month = {apr},
number = {2},
pages = {163--180},
publisher = { Routledge },
title = {{Can scientists be objective?}},
url = {https://www.tandfonline.com/doi/abs/10.1080/02691720600807468},
volume = {20},
year = {2006}
}
@article{Lakens2017a,
abstract = {Scientists should be able to provide support for the absence of a meaningful effect. Currently, researchers often incorrectly conclude an effect is absent based a nonsignificant result. A widely recommended approach within a frequentist framework is to test for equivalence. In equivalence tests, such as the two one-sided tests (TOST) procedure discussed in this article, an upper and lower equivalence bound is specified based on the smallest effect size of interest. The TOST procedure can be used to statistically reject the presence of effects large enough to be considered worthwhile. This practical primer with accompanying spreadsheet and R package enables psychologists to easily perform equivalence tests (and power analyses) by setting equivalence bounds based on standardized effect sizes and provides recommendations to prespecify equivalence bounds. Extending your statistical tool kit with equivalence tests is an easy way to improve your statistical and theoretical inferences.},
author = {Lakens, Dani{\"{e}}l},
doi = {10.1177/1948550617697177},
issn = {19485514},
journal = {Social Psychological and Personality Science},
number = {4},
pmid = {28736600},
title = {{Equivalence Tests: A Practical Primer for t Tests, Correlations, and Meta-Analyses}},
volume = {8},
year = {2017}
}
@article{Kuncel2013,
abstract = {In employee selection and academic admission decisions, holistic (clinical) data combination methods continue to be relied upon and preferred by practitioners in our field. This meta-analysis examined and compared the relative predictive power of mechanical methods versus holistic methods in predicting multiple work (advancement, supervisory ratings of performance, and training performance) and academic (grade point average) criteria. There was consistent and substantial loss of validity when data were combined holistically-even by experts who are knowledgeable about the jobs and organizations in question-across multiple criteria in work and academic settings. In predicting job performance, the difference between the validity of mechanical and holistic data combination methods translated into an improvement in prediction of more than 50{\%}. Implications for evidence-based practice are discussed. {\textcopyright} 2013 American Psychological Association.},
author = {Kuncel, Nathan R. and Klieger, David M. and Connelly, Brian S. and Ones, Deniz S.},
doi = {10.1037/a0034156},
file = {::},
issn = {00219010},
journal = {Journal of Applied Psychology},
keywords = {Criterion related validity,Judgment and decision making,Mechanical versus clinical data combination},
number = {6},
pages = {1060--1072},
title = {{Mechanical versus clinical data combination in selection and admissions decisions: A meta-analysis}},
volume = {98},
year = {2013}
}
@article{Claesen2019,
archivePrefix = {arXiv},
arxivId = {10.31234/osf.io/d8wex},
author = {Claesen, A. and Gomes, S. and Tuerlinckx, F. and Vanpaemel, W. and Leuven, K. U.},
eprint = {osf.io/d8wex},
journal = {PsychArxiv},
primaryClass = {10.31234},
title = {{Preregistration: Comparing Dream to Reality}},
url = {https://psyarxiv.com/d8wex/},
year = {2019}
}
@article{Kozyreva2020,
abstract = {Abstract: The Internet has evolved into a ubiquitous digital environment in which people communicate, seek information, and make decisions. Online environments are replete with smart, highly adaptive choice architectures designed primarily to maximize commercial interests, capture and sustain users' attention, monetize user data, and predict and influence future behavior. This online landscape holds multiple negative consequences for society, such as a decline in human autonomy, rising incivility in online conversation, the facilitation of political extremism, and the spread of disinformation. Benevolent choice architects working with regulators may curb the worst excesses of manipulative choice architectures, yet the strategic advantages, resources, and data remain with commercial players. One way to address this imbalance is with interventions that empower Internet users to gain some control over their digital environments, in part by boosting their information literacy and their cognitive resistance to manipulation. Our goal is to present a conceptual map of interventions that are based on insights from psychological science. We begin by systematically outlining how online and offline environments differ despite being increasingly inextricable. We then identify four major types of challenges that users encounter in online environments: persuasive and manipulative choice architectures, AI-assisted information architectures, distractive environments, and false and misleading information. Next, we turn to how psychological science can inform interventions to counteract these challenges of the digital world. After distinguishing between three types of behavioral and cognitive interventions— nudges, technocognition, and boosts—we focus in on boosts, of which we identify two main groups: (1) those aimed at enhancing people's agency in their digital environments (e.g., self- nudging, deliberate ignorance) and (2) those aimed at boosting competences of reasoning and resilience to manipulation (e.g., simple decision aids, inoculation). These cognitive tools are designed to foster the civility of online discourse and protect reason and human autonomy against manipulative choice architectures, attention-grabbing techniques, and the spread of false information.},
author = {Kozyreva, A. and Lewandowsky, S. and Hertwig, R.},
doi = {10.31234/OSF.IO},
journal = {PsyArXiv},
keywords = {Meta,science},
pages = {1--126},
publisher = {PsyArXiv},
title = {{Citizens Versus the Internet: Confronting Digital Challenges With Cognitive Tools}},
url = {https://psyarxiv.com/ky4x8/},
year = {2020}
}
@article{Adam2019,
author = {Adam, David},
doi = {10.1126/science.aay1207},
issn = {0036-8075},
journal = {Science},
month = {may},
publisher = {American Association for the Advancement of Science (AAAS)},
title = {{A solution to psychology's reproducibility problem just failed its first test}},
year = {2019}
}
@article{Yamada2018,
abstract = {The reproducibility problem that exists in various academic fields has been discussed in recent years, and it has been revealed that scientists discreetly engage in several questionable research practices (QRPs). For example, the practice of hypothesizing after the results are known (HARKing) involves the reconstruction of hypotheses and stories after results have been obtained (Kerr, 1998) and thereby promotes the retrospective fabrication of favorable hypotheses (cf. Bem, 2004). P-hacking encompasses various untruthful manipulations for obtaining p-values less than 0.05 (Simmons et al., 2011). Such unethical practices dramatically increase the number of false positive findings and thereby encourage the intentional fabrication of evidence as the basis of scientific knowledge and theory, which leads to individual profits for researchers.},
annote = {Not all that insightful. Though it did have some ideas about how you can circumvent the limitations of pre-registration and make it look as if you pre-registered well.},
author = {Yamada, Yuki},
doi = {10.3389/fpsyg.2018.01831},
file = {::},
issn = {1664-1078},
journal = {Frontiers in Psychology},
keywords = {Academic publishing,Misconduct in research,Open science,Preregistration,QRP,pre-registration,pre-registration, criticisms},
mendeley-tags = {pre-registration,pre-registration, criticisms},
month = {sep},
number = {SEP},
pages = {1831},
publisher = {Frontiers Media S.A.},
title = {{How to Crack Pre-registration: Toward Transparent and Open Science}},
url = {https://www.frontiersin.org/article/10.3389/fpsyg.2018.01831/full},
volume = {9},
year = {2018}
}
@article{Glass1976,
author = {Glass, Gene V},
issn = {0013-189X},
journal = {Educational researcher},
number = {10},
pages = {3--8},
publisher = {Sage Publications Sage CA: Los Angeles, CA},
title = {{Primary, secondary, and meta-analysis of research}},
volume = {5},
year = {1976}
}
@article{Browne2000,
abstract = {This paper gives a review of cross-validation methods. The original applications in multiple linear regression are considered first. It is shown how predictive accuracy depends on sample size and the number of predictor variables. Both two-sample and single-sample cross-validation indices are investigated. The application of cross-validation methods to the analysis of moment structures is then justified. An equivalence of a single-sample cross-validation index and the Akaike information criterion is pointed out. It is seen that the optimal number of parameters suggested by both single-sample and two-sample cross-validation indices will depend on sample size. {\textcopyright} 2000 Academic Press.},
author = {Browne, Michael W.},
doi = {10.1006/jmps.1999.1279},
issn = {00222496},
journal = {Journal of Mathematical Psychology},
month = {mar},
number = {1},
pages = {108--132},
publisher = {Academic Press},
title = {{Cross-validation methods}},
volume = {44},
year = {2000}
}
@article{Kerr1998,
abstract = {This article considers a practice in scientific communication termed HARKing (Hypothesizing After the Results are Known). HARKing is defined as presenting a post hoc hypothesis (i.e., one based on or informed by one's results) in one's research report as if it were, in fact, an a priori hypotheses. Several forms of HARKing are identified and survey data are presented that suggests that at least some forms of HARKing are widely practiced and widely seen as inappropriate. I identify several reasons why scientists might HARK. Then I discuss several reasons why scientists ought not to HARK. It is conceded that the question of whether HARKing's costs exceed its benefits is a complex one that ought to be addressed through research, open discussion, and debate. To help stimulate such discussion (and for those such as myself who suspect that HARKing's costs do exceed its benefits), I conclude the article with some suggestions for deterring HARKing. Copyright {\textcopyright} 1998 by Lawrence Erlbaum Associates, Inc.},
author = {Kerr, Norbert L.},
doi = {10.1207/s15327957pspr0203_4},
issn = {10888683},
journal = {Personality and Social Psychology Review},
keywords = {MEDLINE,N L Kerr,NCBI,NIH,NLM,National Center for Biotechnology Information,National Institutes of Health,National Library of Medicine,PubMed Abstract,doi:10.1207/s15327957pspr0203{\_}4,pmid:15647155},
number = {3},
pages = {196--217},
publisher = {SAGE Publications Inc.},
title = {{HARKing: Hypothesizing after the results are known}},
url = {https://pubmed.ncbi.nlm.nih.gov/15647155/},
volume = {2},
year = {1998}
}
@article{Cribbie2017,
abstract = {Researchers in psychology are frequently confronted with the issue of analysing multiple relationships simultaneously. For example, this could involve multiple outcome variables or multiple predictors in a regression framework. Current recommendations typically steer researchers toward familywise or falsediscovery rate Type I error control to limit the probability of incorrectly rejecting the null hypothesis. Stepwise modified-Bonferroni procedures are suggested for following this recommendation. However, longstanding arguments against multiplicity control combined with a modern distaste for null hypothesis significance testing have warranted revisiting this debate. This paper is an exploration of both sides of the multiplicity control debate, with the goal of educating concerned parties regarding best practices for conducting multiple related tests.},
author = {Cribbie, Robert A.},
doi = {10.1037/cbs0000075},
issn = {18792669},
journal = {Canadian Journal of Behavioural Science},
keywords = {Bonferroni,effect sizes,estimation,multiplicity control,null hypothesis significance testing},
month = {jul},
number = {3},
pages = {159--165},
publisher = {American Psychological Association Inc.},
title = {{Multiplicity Control, School Uniforms, and Other Perplexing Debates}},
url = {/record/2017-29525-002},
volume = {49},
year = {2017}
}
@article{VanderLinden2017,
abstract = {Two field experiments examined the effectiveness of signs requesting hotel guests' participation in an environmental conservation program. Appeals employing descriptive norms (e.g., “the majority of guests reuse their towels”) proved superior to a traditional appeal widely used by hotels that focused solely on environmental protection. Moreover, normative appeals were most effective when describing group behavior that occurred in the setting that most closely matched individuals' immediate situational circumstances (e.g., “the majority of guests in this roomreuse their towels”), which we refer to as provincial norms. Theoretical and practical implications for managing proenvironmental efforts are discussed.},
author = {van der Linden, Sander and Chryst, Breanne},
doi = {10.3389/fams.2017.00012},
file = {::},
issn = {2297-4687},
journal = {Frontiers in Applied Mathematics and Statistics},
keywords = {Bayesian evidence synthesis,meta-analysis,reproducibility,social norms},
month = {jun},
pages = {12},
publisher = {Frontiers Media SA},
title = {{No Need for Bayes Factors: A Fully Bayesian Evidence Synthesis}},
url = {http://journal.frontiersin.org/article/10.3389/fams.2017.00012/full},
volume = {3},
year = {2017}
}
@article{Bollen2019a,
abstract = {Structural misspecifications in factor analysis include using the wrong number of factors and omitting cross loadings or correlated errors. The impact of these errors on factor loading estimates is understudied. Factor loadings underlie our assessments of the validity and reliability of indicators. Thus knowing how structural misspecifications affect a factor loading is a key issue. This paper develops analytic conditions of when misspecifications affect Bollen's (1996) model implied instrumental variable, two stage least squares (MIIV-2SLS) estimator of a factor loading. It shows that if an indicator equation is correctly specified, then correlated errors among other measures, mixing up causal indicators with reflective, omitting cross loadings, and omitting direct effects between indicators leave the MIIV-2SLS estimator of the factor loading unchanged. Alternatively, if the indicator or the scaling indicator equation is misspecified, then the loading is unlikely to be robust. The results are illustrated with hypothetical and empirical examples.},
author = {Bollen, Kenneth A.},
doi = {10.1080/10705511.2019.1691005},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Model implied instrumental variables,factor analysis,misspecified models,two stage least squares},
publisher = {Routledge},
title = {{When Good Loadings Go Bad: Robustness in Factor Analysis}},
url = {https://www.tandfonline.com/doi/abs/10.1080/10705511.2019.1691005},
year = {2019}
}
@article{Gelman2018a,
abstract = {A standard mode of inference in social and behavioral science is to establish stylized facts using statistical significance in quantitative studies. However, in a world in which measurements are noisy and effects are small, this will not work: selection on statistical significance leads to effect sizes which are overestimated and often in the wrong direction. After a brief discussion of two examples, one in economics and one in social psychology, we consider the procedural solution of open postpublication review, the design solution of devoting more effort to accurate measurements and within-person comparisons, and the statistical analysis solution of multilevel modeling and reporting all results rather than selection on significance. We argue that the current replication crisis in science arises in part from the ill effects of null hypothesis significance testing being used to study small effects with noisy data. In such settings, apparent success comes easy but truly replicable results require a more serious connection between theory, measurement, and data.},
author = {Gelman, Andrew},
doi = {10.1177/0146167217729162},
issn = {15527433},
journal = {Personality and Social Psychology Bulletin},
number = {1},
title = {{The Failure of Null Hypothesis Significance Testing When Studying Incremental Changes, and What to Do About It}},
volume = {44},
year = {2018}
}
@misc{Banks2016,
abstract = {Purpose: Questionable research or reporting practices (QRPs) contribute to a growing concern regarding the credibility of research in the organizational sciences and related fields. Such practices include design, analytic, or reporting practices that may introduce biased evidence, which can have harmful implications for evidence-based practice, theory development, and perceptions of the rigor of science. Design/Methodology/Approach: To assess the extent to which QRPs are actually a concern, we conducted a systematic review to consider the evidence on QRPs. Using a triangulation approach (e.g., by reviewing data from observations, sensitivity analyses, and surveys), we identified the good, the bad, and the ugly. Findings: Of the 64 studies that fit our criteria, 6 appeared to find little to no evidence of engagement in QRPs and the other 58 found more severe evidence (91 {\%}). Implications: Drawing upon the findings, we provide recommendations for future research related to publication practices and academic training. Originality/value: We report findings from studies that suggest that QRPs are not a problem, that QRPs are used at a suboptimal rate, and that QRPs present a threat to the viability of organizational science research.},
author = {Banks, George C. and Rogelberg, Steven G. and Woznyj, Haley M. and Landis, Ronald S. and Rupp, Deborah E.},
booktitle = {Journal of Business and Psychology},
doi = {10.1007/s10869-016-9456-7},
issn = {08893268},
number = {3},
title = {{Editorial: Evidence on Questionable Research Practices: The Good, the Bad, and the Ugly}},
volume = {31},
year = {2016}
}
@article{Marsman2017a,
abstract = {We applied three Bayesian methods to reanalyse the preregistered contributions to the Social Psychology special issue 'Replications of Important Results in Social Psychology' (Nosek {\&} Lakens. 2014 Registered reports: a method to increase the credibility of published results. Soc. Psychol. 45, 137–141. (doi:10.1027/1864-9335/a000192)). First, individualexperiment Bayesian parameter estimation revealed that for directed effect size measures, only three out of 44 central 95{\%} credible intervals did not overlap with zero and fell in the expected direction. For undirected effect size measures, only four out of 59 credible intervals contained values greater than 0.10 (10{\%} of variance explained) and only 19 intervals contained values larger than 0.05. Second, a Bayesian randomeffects meta-analysis for all 38 t-tests showed that only one out of the 38 hierarchically estimated credible intervals did not overlap with zero and fell in the expected direction. Third, a Bayes factor hypothesis test was used to quantify the evidence for the null hypothesis against a default one-sided alternative. Only seven out of 60 Bayes factors indicated non-anecdotal support in favour of the alternative hypothesis (BF10 {\textgreater}3), whereas 51 Bayes factors indicated at least some support for the null hypothesis.We hope that future analyses of replication success will embrace a more inclusive statistical approach by adopting a wider range of complementary techniques.},
author = {Marsman, Maarten and Sch{\"{o}}nbrodt, Felix D. and Morey, Richard D. and Yao, Yuling and Gelman, Andrew and Wagenmakers, Eric Jan},
doi = {10.1098/rsos.160426},
issn = {20545703},
journal = {Royal Society Open Science},
number = {1},
title = {{A Bayesian bird's eye view of 'Replications of important results in social psychology'}},
volume = {4},
year = {2017}
}
@article{Xu2020,
author = {Xu, Chengxin and Liu, Yixin},
doi = {10.31234/OSF.IO},
journal = {Preprint},
keywords = {Politics,Prejudice and Discrimination,Social and Behavioral Sciences,Social and Personality Psychology,discrimination,political psychology,social psychology,social stigma,xenophobia},
publisher = {PsyArXiv},
title = {{Social Cost with No Political Gain: The "Chinese Virus" Effect}},
url = {https://psyarxiv.com/9g5wk/},
year = {2020}
}
@article{Shaw2020,
archivePrefix = {arXiv},
arxivId = {https://doi.org/10.31234/osf.io/kdurz},
author = {Shaw, Maired and Cloos, Leonie and Luong, Raymond and Elbaz, Sasha and Flake, Jessica},
eprint = {/doi.org/10.31234/osf.io/kdurz},
journal = {PsyArxiv},
primaryClass = {https:},
title = {{Measurement Practices in Large-Scale Replications: Insights from Many Labs 2}},
url = {https://psyarxiv.com/kdurz/},
year = {2020}
}
@article{Zitzmann2020,
abstract = {This journal recently published a systematic review of simulation studies on the performance of Bayesian approaches for estimating latent variable models in small samples. The authors of this revie...},
author = {Zitzmann, Steffen and L{\"{u}}dtke, Oliver and Robitzsch, Alexander and Hecht, Martin},
doi = {10.1080/10705511.2020.1752216},
issn = {1070-5511},
journal = {Structural Equation Modeling: A Multidisciplinary Journal},
keywords = {Bayesian estimation,Markov chain Monte Carlo,small sample,structural equation modeling},
month = {may},
pages = {1--11},
publisher = { Routledge },
title = {{On the Performance of Bayesian Approaches in Small Samples: A Comment on Smid, McNeish, Miocevic, and van de Schoot (2020)}},
url = {https://www.tandfonline.com/doi/full/10.1080/10705511.2020.1752216},
year = {2020}
}
@article{Hughes2019,
abstract = {Background: Missing data are unavoidable in epidemiological research, potentially leading to bias and loss of precision. Multiple imputation (MI) is widely advocated as an improvement over complete case analysis (CCA). However, contrary to widespread belief, CCA is preferable to MI in some situations. Methods: We provide guidance on choice of analysis when data are incomplete. Using causal diagrams to depict missingness mechanisms, we describe when CCA will not be biased by missing data and compare MI and CCA, with respect to bias and efficiency, in a range of missing data situations. We illustrate selection of an appropriate method in practice. Results: For most regression models, CCA gives unbiased results when the chance of being a complete case does not depend on the outcome after taking the covariates into consideration, which includes situations where data are missing not at random. Consequently, there are situations in which CCA analyses are unbiased while MI analyses, assuming missing at random (MAR), are biased. By contrast MI, unlike CCA, is valid for all MAR situations and has the potential to use information contained in the incomplete cases and auxiliary variables to reduce bias and/or improve precision. For this reason, MI was preferred over CCA in our real data example. Conclusions: Choice of method for dealing with missing data is crucial for validity of conclusions, and should be based on careful consideration of the reasons for the missing data, missing data patterns and the availability of auxiliary information.},
author = {Hughes, Rachael A. and Heron, Jon and Sterne, Jonathan A.C. and Tilling, Kate},
doi = {10.1093/ije/dyz032},
issn = {14643685},
journal = {International Journal of Epidemiology},
number = {4},
pmid = {30879056},
title = {{Accounting for missing data in statistical analyses: Multiple imputation is not always the answer}},
volume = {48},
year = {2019}
}
@article{Brown2017,
abstract = {We present a simple mathematical technique that we call granularity-related inconsistency of means (GRIM) for verifying the summary statistics of research reports in psychology. This technique evaluates whether the reported means of integer data such as Likert-type scales are consistent with the given sample size and number of items. We tested this technique with a sample of 260 recent empirical articles in leading journals. Of the articles that we could test with the GRIM technique (N = 71), around half (N = 36) appeared to contain at least one inconsistent mean, and more than 20{\%} (N = 16) contained multiple such inconsistencies. We requested the data sets corresponding to 21 of these articles, receiving positive responses in 9 cases. We confirmed the presence of at least one reporting error in all cases, with three articles requiring extensive corrections. The implications for the reliability and replicability of empirical psychology are discussed.},
author = {Brown, Nicholas J.L. and Heathers, James A.J.},
doi = {10.1177/1948550616673876},
issn = {19485514},
journal = {Social Psychological and Personality Science},
number = {4},
title = {{The GRIM Test: A Simple Technique Detects Numerous Anomalies in the Reporting of Results in Psychology}},
volume = {8},
year = {2017}
}
@article{Grimm2020,
abstract = {Recursive partitioning, also known as decision trees and classification and regression trees (CART), is a machine learning procedure that has gained traction in the behavioral sciences because of i...},
author = {Grimm, Kevin J. and Jacobucci, Ross},
doi = {10.1080/00273171.2020.1751028},
issn = {0027-3171},
journal = {Multivariate Behavioral Research},
keywords = {CART,Machine learning,reliability},
month = {apr},
pages = {1--13},
publisher = {Routledge},
title = {{Reliable Trees: Reliability Informed Recursive Partitioning for Psychological Data}},
url = {https://www.tandfonline.com/doi/full/10.1080/00273171.2020.1751028},
year = {2020}
}
@article{Cairo2020,
abstract = {{\textless}p{\textgreater}Selective reporting practices (SRPs)—adding, dropping, or altering study elements when preparing reports for publication—are thought to increase false positives in scientific research. Yet analyses of SRPs have been limited to self-reports or analyses of pre-registered and published studies. To assess SRPs in social psychological research more broadly, we compared doctoral dissertations defended between 1999 and 2017 with the publications based on those dissertations. Selective reporting occurred in nearly 50{\%} of studies. Fully supported dissertation hypotheses were 3 times more likely to be published than unsupported hypotheses, while unsupported hypotheses were nearly 4 times more likely to be dropped from publications. Few hypotheses were found to be altered or added post hoc. Dissertation studies with fewer supported hypotheses were more likely to remove participants or measures from publications. Selective hypothesis reporting and dropped measures significantly predicted greater hypothesis support in published studies, supporting concerns that SRPs may increase Type 1 error risk.{\textless}/p{\textgreater}},
author = {Cairo, Athena H. and Green, Jeffrey D. and Forsyth, Donelson R. and Behler, Anna Maria C. and Raldiris, Tarah L.},
doi = {10.1177/0146167220903896},
issn = {0146-1672},
journal = {Personality and Social Psychology Bulletin},
keywords = {decision making,hypothesis testing strategies,questionable research practices,selective reporting practices},
month = {feb},
pages = {014616722090389},
publisher = {SAGE Publications Inc.},
title = {{Gray (Literature) Matters: Evidence of Selective Hypothesis Reporting in Social Psychological Research}},
url = {http://journals.sagepub.com/doi/10.1177/0146167220903896},
year = {2020}
}
@inproceedings{Jun2019,
author = {Jun, Eunice and Daum, Maureen and Roesch, Jared and Chasins, Sarah and Berger, Emery and Just, Rene and Reinecke, Katharina},
booktitle = {Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology},
pages = {591--603},
title = {{Tea: A High-level Language and Runtime System for Automating Statistical Analysis}},
year = {2019}
}
@article{Szafir2016,
author = {Szafir, Danielle Albers and Haroz, Steve and Gleicher, Michael and Franconeri, Steven},
issn = {1534-7362},
journal = {Journal of vision},
number = {5},
pages = {11},
publisher = {The Association for Research in Vision and Ophthalmology},
title = {{Four types of ensemble coding in data visualizations}},
volume = {16},
year = {2016}
}
@inproceedings{Matejka2017,
author = {Matejka, Justin and Fitzmaurice, George},
booktitle = {Proceedings of the 2017 CHI Conference on Human Factors in Computing Systems},
pages = {1290--1294},
title = {{Same stats, different graphs: generating datasets with varied appearance and identical statistics through simulated annealing}},
year = {2017}
}
@article{Moritz2018,
author = {Moritz, Dominik and Wang, Chenglong and Nelson, Greg L and Lin, Halden and Smith, Adam M and Howe, Bill and Heer, Jeffrey},
issn = {1077-2626},
journal = {IEEE transactions on visualization and computer graphics},
number = {1},
pages = {438--448},
publisher = {IEEE},
title = {{Formalizing visualization design knowledge as constraints: Actionable and extensible models in draco}},
volume = {25},
year = {2018}
}
@article{Dimara2018,
author = {Dimara, Evanthia and Franconeri, Steven and Plaisant, Catherine and Bezerianos, Anastasia and Dragicevic, Pierre},
issn = {1077-2626},
journal = {IEEE transactions on visualization and computer graphics},
publisher = {IEEE},
title = {{A task-based taxonomy of cognitive biases for information visualization}},
year = {2018}
}
@book{Fife2020,
address = {Glassboro, NJ},
author = {Fife, Dustin A.},
publisher = {QuantPsych},
title = {{The Order of the Statistical Jedi: Responsibilities, Routines, and Rituals}},
year = {2020}
}
@article{Mackinlay1986,
abstract = {The goal of the research described in this paper is to develop an application-independent presentation tool that automatically designs effective graphical presentations (such as bar charts, scatter plots, and connected graphs) of relational information. Two problems are raised by this goal: The codification of graphic design criteria in a form that can be used by the presentation tool, and the generation of a wide variety of designs so that the presentation tool can accommodate a wide variety of information. The approach described in this paper is based on the view that graphical presentations are sentences of graphical languages. The graphic design issues are codified as expressiveness and effectiveness criteria for graphical languages. Expressiveness criteria determine whether a graphical language can express the desired information. Effectiveness criteria determine whether a graphical language exploits the capabilities of the output medium and the human visual system. A wide variety of designs can be systematically generated by using a composition algebra that composes a small set of primitive graphical languages. Artificial intelligence techniques are used to implement a prototype presentation tool called APT (A Presentation Tool), which is based on the composition algebra and the graphic design criteria. {\textcopyright} 1986, ACM. All rights reserved.},
author = {Mackinlay, Jock},
doi = {10.1145/22949.22950},
file = {::},
issn = {15577368},
journal = {ACM Transactions on Graphics (TOG)},
keywords = {Automatic generation,composition algebra,effectiveness,expressiveness,graphic design,information presentation,presentation tool,user interface},
month = {apr},
number = {2},
pages = {110--141},
title = {{Automating the Design of Graphical Presentations of Relational Information}},
url = {http://dl.acm.org/doi/10.1145/22949.22950},
volume = {5},
year = {1986}
}
@article{Weissgerber2015a,
abstract = {Figures in scientific publications are critically important because they often show the data supporting key findings. Our systematic review of research articles published in top physiology journals (n = 703) suggests that, as scientists, we urgently need to change our practices for presenting continuous data in small sample size studies. Papers rarely included scatterplots, box plots, and histograms that allow readers to critically evaluate continuous data. Most papers presented continuous data in bar and line graphs. This is problematic, as many different data distributions can lead to the same bar or line graph. The full data may suggest different conclusions from the summary statistics. We recommend training investigators in data presentation, encouraging a more complete presentation of data, and changing journal editorial policies. Investigators can quickly make univariate scatterplots for small sample size studies using our Excel templates.},
author = {Weissgerber, Tracey L. and Milic, Natasa M. and Winham, Stacey J. and Garovic, Vesna D.},
doi = {10.1371/journal.pbio.1002128},
file = {::},
isbn = {0196-0644 (Print)$\backslash$n0196-0644 (Linking)},
issn = {1545-7885},
journal = {PLoS biology},
keywords = {*Data Interpretation,Publishing,Statistical},
language = {eng},
month = {apr},
number = {4},
pages = {e1002128--e1002128},
pmid = {25901488},
publisher = {Public Library of Science},
title = {{Beyond bar and line graphs: time for a new data presentation paradigm}},
url = {https://www.ncbi.nlm.nih.gov/pubmed/25901488 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4406565/ https://dx.plos.org/10.1371/journal.pbio.1002128},
volume = {13},
year = {2015}
}
@inproceedings{Kandel2012,
abstract = {Data quality issues such as missing, erroneous, extreme and duplicate values undermine analysis and are time-consuming to find and fix. Automated methods can help identify anomalies, but determining what constitutes an error is context-dependent and so requires human judgment. While visualization tools can facilitate this process, analysts must often manually construct the necessary views, requiring significant expertise. We present Profiler, a visual analysis tool for assessing quality issues in tabular data. Profiler applies data mining methods to automatically flag problematic data and suggests coordinated summary visualizations for assessing the data in context. The system contributes novel methods for integrated statistical and visual analysis, automatic view suggestion, and scalable visual summaries that support real-time interaction with millions of data points. We present Profiler's architecture - - including modular components for custom data types, anomaly detection routines and summary visualizations - - and describe its application to motion picture, natural disaster and water quality data sets. {\textcopyright} 2012 ACM.},
address = {New York, New York, USA},
author = {Kandel, Sean and Parikh, Ravi and Paepcke, Andreas and Hellerstein, Joseph M. and Heer, Jeffrey},
booktitle = {Proceedings of the Workshop on Advanced Visual Interfaces AVI},
doi = {10.1145/2254556.2254659},
file = {::},
isbn = {9781450312875},
keywords = {anomaly detection,data analysis,data quality,visualization},
pages = {547--554},
publisher = {ACM Press},
title = {{Profiler: Integrated statistical analysis and visualization for data quality assessment}},
url = {http://dl.acm.org/citation.cfm?doid=2254556.2254659},
year = {2012}
}
@article{Correll2019,
abstract = {Famous examples such as Anscombe's Quartet highlight that one of the core benefits of visualizations is allowing people to discover visual patterns that might otherwise be hidden by summary statistics. This visual inspection is particularly important in exploratory data analysis, where analysts can use visualizations such as histograms and dot plots to identify data quality issues. Yet, these visualizations are driven by parameters such as histogram bin size or mark opacity that have a great deal of impact on the final visual appearance of the chart, but are rarely optimized to make important features visible. In this paper, we show that data flaws have varying impact on the visual features of visualizations, and that the adversarial or merely uncritical setting of design parameters of visualizations can obscure the visual signatures of these flaws. Drawing on the framework of Algebraic Visualization Design, we present the results of a crowdsourced study showing that common visualization types can appear to reasonably summarize distributional data while hiding large and important flaws such as missing data and extraneous modes. We make use of these results to propose additional best practices for visualizations of distributions for data quality tasks.},
author = {Correll, Michael A and Li, Mingwei and Kindlmann, Gordon and Scheidegger, Carlos},
doi = {10.1109/TVCG.2018.2864907},
issn = {19410506},
journal = {IEEE Transactions on Visualization and Computer Graphics},
keywords = {Graphical perception,data quality,univariate visualizations},
month = {jan},
number = {1},
pages = {830--839},
publisher = {IEEE Computer Society},
title = {{Looks Good to Me: Visualizations As Sanity Checks}},
volume = {25},
year = {2019}
}
@techreport{Butler1993,
author = {Butler, Darrell L},
booktitle = {Behavior Research Methods, Instruments, {\&} Computers},
file = {::},
number = {2},
pages = {81--92},
title = {{Graphics in psychology: Pictures, data, and especially concepts}},
volume = {25},
year = {1993}
}
@article{Kyonka2019,
abstract = {Debates about the utility of p values and correct ways to analyze data have inspired new guidelines on statistical inference by the American Psychological Association (APA) and changes in the way results are reported in other scientific journals, but their impact on the Journal of the Experimental Analysis of Behavior (JEAB) has not previously been evaluated. A content analysis of empirical articles published in JEAB between 1992 and 2017 investigated whether statistical and graphing practices changed during that time period. The likelihood that a JEAB article reported a null hypothesis significance test, included a confidence interval, or depicted at least one figure with error bars has increased over time. Features of graphs in JEAB, including the proportion depicting single-subject data, have not changed systematically during the same period. Statistics and graphing trends in JEAB largely paralleled those in mainstream psychology journals, but there was no evidence that changes to APA style had any direct impact on JEAB. In the future, the onus will continue to be on authors, reviewers and editors to ensure that statistical and graphing practices in JEAB continue to evolve without interfering with characteristics that set the journal apart from other scientific journals.},
annote = {This is really focused on applied behavior analysis and this specific journal.},
author = {Kyonka, Elizabeth G.E. and Mitchell, Suzanne H. and Bizo, Lewis A.},
doi = {10.1002/jeab.509},
file = {::},
issn = {19383711},
journal = {Journal of the Experimental Analysis of Behavior},
keywords = {confidence intervals,error bars,graphs,null hypothesis significance testing,statistical reform},
month = {mar},
number = {2},
pages = {155--165},
publisher = {Wiley-Blackwell Publishing Ltd},
title = {{Beyond inference by eye: Statistical and graphing practices in JEAB, 1992-2017}},
volume = {111},
year = {2019}
}
@article{Hu2018,
abstract = {Generating knowledge from data is an increasingly important activity. This process of data exploration consists of multiple tasks: data ingestion, visualization, statistical analysis, and storytelling. Though these tasks are complementary, analysts often execute them in separate tools. Moreover, these tools have steep learning curves due to their reliance on manual query specification. Here, we describe the design and implementation of DIVE, a web-based system that integrates state-of-the-art data exploration features into a single tool. DIVE contributes a mixed-initiative interaction scheme that combines recommendation with point-and-click manual specification, and a consistent visual language that unifies different stages of the data exploration workflow. In a controlled user study with 67 professional data scientists, we find that DIVE users were significantly more successful and faster than Excel users at completing predefined data visualization and analysis tasks.},
author = {Hu, Kevin and Orghian, Diana and Hidalgo, C{\'{e}}sar},
doi = {10.1145/3209900.3209910},
file = {::},
isbn = {9781450358279},
journal = {Proceedings of the Workshop on Human-In-the-Loop Data Analytics, HILDA 2018},
keywords = {Data exploration,Data visualization,Mixed-initiative interfaces,Statistical analysis,Visualization recommendation},
title = {{DIVE: A mixed-initiative system supporting integrated data exploration workflows}},
year = {2018}
}
@misc{Pastore2017,
annote = {Take home points:
1. Bar graphs are misleading
2. Summary statistics are misleading.
Shows graphs that reveal more information than just the summary data and/or bar charts.
Nothing too exciting here, but includes a lot of good references},
author = {Pastore, Massimiliano and Lionetti, Francesca and Alto{\`{e}}, Gianmarco},
booktitle = {Frontiers in Psychology},
doi = {10.3389/fpsyg.2017.01666},
file = {::},
issn = {16641078},
keywords = {Bar chart and box plot,Credibility crisis,Exploratory data analysis,Graphical representation,Statistical reasoning,visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
mendeley-tags = {visualization,visualization; replication crisis,visualization; survey of use,visualization; transparency},
month = {sep},
number = {SEP},
publisher = {Frontiers Media S.A.},
title = {{When one shape does not fit all: A commentary essay on the use of graphs in psychological research}},
volume = {8},
year = {2017}
}
@article{Peden2000,
abstract = {In this article, we report a content analysis of data graphs in introductory and upper level psychology textbooks. Three raters classified data graphs as either line, bar, scatter, 100{\%}, or picture graphs. The results indicated that (a) only 5 types of data graphs appear in psychology textbooks, (b) the number of data graphs per page varies considerably in both types of textbook, and (c) comparisons of observed and expected frequencies revealed that proportions of 100{\%} graphs were greater in introductory textbooks and proportions of scatter graphs were greater in upper level textbooks. We discuss implications of these findings for teachers of psychology and authors of undergraduate psychology textbooks.},
annote = {This will be a good one to reference. And then they quotesomebody who said that graphical literacy is as important as reading and writing. They also survey a bunch of textbooks and see how well they are adequately addressing the needs of students.},
author = {Peden, Blaine F and Hausmann, Sarah E},
doi = {10.1207/S15328023TOP2702_03},
file = {::},
journal = {Teaching of Psychology},
keywords = {graphics,reform,statistical reasoning,student training,superiority of,survey of use,teachers of statistics,textbooks: errors,visualization,visualizations in teaching},
mendeley-tags = {graphics,reform,statistical reasoning,student training,superiority of,survey of use,teachers of statistics,textbooks: errors,visualization,visualizations in teaching},
number = {2},
pages = {93--97},
title = {{Data Graphs in Introductory and Upper Level Psychology Textbooks: A Content Analysis}},
volume = {27},
year = {2000}
}
@article{Schild2013,
author = {Schild, A. H. E. and Voracek, M.},
journal = {Research Synthesis Methods},
number = {3},
pages = {209--219},
title = {{Less is less: A systematic review of graph use in meta‐analyses. - PsycNET}},
url = {https://psycnet.apa.org/record/2013-34734-001},
volume = {4},
year = {2013}
}
@article{Amrhein2019a,
abstract = {Statistical inference often fails to replicate. One reason is that many results may be selected for drawing inference because some threshold of a statistic like the P-value was crossed, leading to biased reported effect sizes. Nonetheless, considerable non-replication is to be expected even without selective reporting, and generalizations from single studies are rarely if ever warranted. Honestly reported results must vary from replication to replication because of varying assumption violations and random variation; excessive agreement itself would suggest deeper problems, such as failure to publish results in conflict with group expectations or desires. A general perception of a “replication crisis” may thus reflect failure to recognize that statistical tests not only test hypotheses, but countless assumptions and the entire environment in which research takes place. Because of all the uncertain and unknown assumptions that underpin statistical inferences, we should treat inferential statistics as highly unstable local descriptions of relations between assumptions and data, rather than as providing generalizable inferences about hypotheses or models. And that means we should treat statistical results as being much more incomplete and uncertain than is currently the norm. Acknowledging this uncertainty could help reduce the allure of selective reporting: Since a small P-value could be large in a replication study, and a large P-value could be small, there is simply no need to selectively report studies based on statistical results. Rather than focusing our study reports on uncertain conclusions, we should thus focus on describing accurately how the study was conducted, what problems occurred, what data were obtained, what analysis methods were used and why, and what output those methods produced.},
author = {Amrhein, Valentin and Trafimow, David and Greenland, Sander},
doi = {10.1080/00031305.2018.1543137},
issn = {15372731},
journal = {American Statistician},
number = {sup1},
title = {{Inferential Statistics as Descriptive Statistics: There Is No Replication Crisis if We Don't Expect Replication}},
volume = {73},
year = {2019}
}
@article{Greenland2019,
abstract = {The present note explores sources of misplaced criticisms of P-values, such as conflicting definitions of “significance levels” and “P-values” in authoritative sources, and the consequent misinterpretation of P-values as error probabilities. It then discusses several properties of P-values that have been presented as fatal flaws: That P-values exhibit extreme variation across samples (and thus are “unreliable”), confound effect size with sample size, are sensitive to sample size, and depend on investigator sampling intentions. These properties are often criticized from a likelihood or Bayesian framework, yet they are exactly the properties P-values should exhibit when they are constructed and interpreted correctly within their originating framework. Other common criticisms are that P-values force users to focus on irrelevant hypotheses and overstate evidence against those hypotheses. These problems are not however properties of P-values but are faults of researchers who focus on null hypotheses and overstate evidence based on misperceptions that p = 0.05 represents enough evidence to reject hypotheses. Those problems are easily seen without use of Bayesian concepts by translating the observed P-value p into the Shannon information (S-value or surprisal) –log 2 (p).},
author = {Greenland, Sander},
doi = {10.1080/00031305.2018.1529625},
issn = {15372731},
journal = {American Statistician},
number = {sup1},
title = {{Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values}},
volume = {73},
year = {2019}
}
@article{Merkle2018a,
abstract = {This article describes blavaan, an R package for estimating Bayesian structural equation models (SEMs) via JAGS and for summarizing the results. It also describes a novel parameter expansion approach for estimating specific types of models with residual covariances, which facilitates estimation of these models in JAGS. The methodology and software are intended to provide users with a general means of estimating Bayesian SEMs, both classical and novel, in a straightforward fashion. Users can estimate Bayesian versions of classical SEMs with lavaan syntax, they can obtain state-of-the-art Bayesian fit measures associated with the models, and they can export JAGS code to modify the SEMs as desired. These features and more are illustrated by example, and the parameter expansion approach is explained in detail.},
archivePrefix = {arXiv},
arxivId = {1511.05604},
author = {Merkle, Edgar C. and Rosseel, Yves},
doi = {10.18637/jss.v085.i04},
eprint = {1511.05604},
file = {::},
issn = {15487660},
journal = {Journal of Statistical Software},
keywords = {Bayesian SEM,JAGS,Lavaan,MCMC,Structural equation models},
month = {jun},
number = {1},
pages = {1--30},
publisher = {American Statistical Association},
title = {blavaan: bayesian structural equation models via parameter expansion},
volume = {85},
year = {2018}
}
@incollection{Kaplan2012,
author = {Kaplan, D. and Depaoli, S.},
booktitle = {Handbook of structural equation modeling},
editor = {Hoyle, R. H.},
pages = {650--673},
title = {{Bayesian structural equation modeling}},
url = {https://psycnet.apa.org/record/2012-16551-038},
year = {2012}
}
@article{Assaf2018,
abstract = {While the Bayesian SEM approach is now receiving a strong attention in the literature, tourism studies still heavily rely on the covariance-based approach for SEM estimation. In a recent special issue dedicated to the topic, Zyphur and Oswald (2013) used the term “Bayesian revolution” to describe the rapid growth of the Bayesian approach across multiple social science disciplines. The method introduces several advantages that make SEM estimation more flexible and powerful. We aim in this paper to introduce tourism researchers to the power of the Bayesian approach and discuss its unique advantages over the covariance-based approach. We provide first some foundations of Bayesian estimation and inference. We then present an illustration of the method using a tourism application. The paper also conducts a Monte Carlo simulation to illustrate the performance of the Bayesian approach in small samples and discuss several complicated SEM contexts where the Bayesian approach provides unique advantages.},
author = {Assaf, A. George and Tsionas, Mike and Oh, Haemoon},
doi = {10.1016/j.tourman.2017.07.018},
file = {::},
issn = {02615177},
journal = {Tourism Management},
keywords = {Bayesian approach,Monte Carlo simulation,SEM,Small samples},
month = {feb},
pages = {98--109},
publisher = {Elsevier Ltd},
title = {{The time has come: Toward Bayesian SEM estimation in tourism research}},
volume = {64},
year = {2018}
}
@article{McDonald2002,
abstract = {Principles for reporting analyses using structural equation modeling are reviewed, with the goal of supplying readers with complete and accurate information. It is recommended that every report give a detailed justification of the model used, along with plausible alternatives and an account of identifiability. Nonnormality and missing data problems should also be addressed. A complete set of parameters and their standard errors is desirable, and it will often be convenient to supply the correlation matrix and discrepancies, as well as goodness-of-fit indices, so that readers can exercise independent critical judgment. A survey of fairly representative studies compares recent practice with the principles of reporting recommended here.},
author = {McDonald, Roderick P. and Ho, Moon Ho Ringo},
doi = {10.1037/1082-989X.7.1.64},
issn = {1082989X},
journal = {Psychological Methods},
number = {1},
pages = {64--82},
pmid = {11928891},
publisher = {American Psychological Association Inc.},
title = {{Principles and practice in reporting structural equation analyses}},
volume = {7},
year = {2002}
}
@article{Jackson2009,
abstract = {Reporting practices in 194 confirmatory factor analysis studies (1,409 factor models) published in American Psychological Association journals from 1998 to 2006 were reviewed and compared with established reporting guidelines. Three research questions were addressed: (a) how do actual reporting practices compare with published guidelines? (b) how do researchers report model fit in light of divergent perspectives on the use of ancillary fit indices (e.g., L.-T. Hu {\&} P. M. Bentler, 1999; H. W. Marsh, K.-T., Hau, {\&} Z. Wen, 2004)? and (c) are fit measures that support hypothesized models reported more often than fit measures that are less favorable? Results indicate some positive findings with respect to reporting practices including proposing multiple models a priori and near universal reporting of the chi-square significance test. However, many deficiencies were found such as lack of information regarding missing data and assessment of normality. Additionally, the authors found increases in reported values of some incremental fit statistics and no statistically significant evidence that researchers selectively report measures of fit that support their preferred model. Recommendations for reporting are summarized and a checklist is provided to help editors, reviewers, and authors improve reporting practices. {\textcopyright} 2009 American Psychological Association.},
author = {Jackson, Dennis L. and Gillaspy, J. Arthur and Purc-Stephenson, Rebecca},
doi = {10.1037/a0014694},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {confirmatory factor analysis,construct validation,research methods,statistical reporting,structural equation models},
month = {mar},
number = {1},
pages = {6--23},
pmid = {19271845},
title = {{Reporting Practices in Confirmatory Factor Analysis: An Overview and Some Recommendations}},
volume = {14},
year = {2009}
}
@article{MacCallum1993,
abstract = {For any given covariance structure model, there will often be alternative models that are indistinguishable from the original model in terms of goodness of fit to data. The existence of such equivalent models in almost universally ignored in empirical studies. A study of 53 published applications showed that equivalent models exist routinely, often in large numbers. Detailed study of three applications showed that equivalent models may often offer substantively meaningful alternative explanations of data. The importance of the equivalent model phenomenon and recommendations for managing and confronting the problem in practice are discussed.},
author = {MacCallum, Robert C. and Wegener, Duane T. and Uchino, Bert N. and Fabrigar, Leandre R.},
doi = {10.1037/0033-2909.114.1.185},
issn = {00332909},
journal = {Psychological Bulletin},
month = {jul},
number = {1},
pages = {185--199},
title = {{The Problem of Equivalent Models in Applications of Covariance Structure Analysis}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/8346326},
volume = {114},
year = {1993}
}
@article{Lee1990,
abstract = {This study introduces the replacing rule as a simplification of Stelzl's (1986) four rules for the generation of recursive equivalent models. The replacing rule is applicable to nonrecursive as well as recursive models, and generates equivalent models through the replacement of direct paths with residual correlations, through the replacement of residual correlations with direct paths, or through the inversion of path directions. Examples of the use of the replacing rule are provided, and its advantages over Stelzl's four rules are discussed. {\textcopyright} 1990, Taylor {\&} Francis Group, LLC. All rights reserved.},
author = {Lee, Soonmook and Hershberger, Scott},
doi = {10.1207/s15327906mbr2503_4},
issn = {15327906},
journal = {Multivariate Behavioral Research},
month = {jul},
number = {3},
pages = {313--334},
title = {{A Simple Rule for Generating Equivalent Models in Covariance Structure Modeling}},
volume = {25},
year = {1990}
}
@article{Jiang2017,
abstract = {When the assumption of multivariate normality is violated and the sample sizes are relatively small, existing test statistics such as the likelihood ratio statistic and Satorra–Bentler's rescaled and adjusted statistics often fail to provide reliable assessment of overall model fit. This article proposes four new corrected statistics, aiming for better model evaluation with nonnormally distributed data at small sample sizes. A Monte Carlo study is conducted to compare the performances of the four corrected statistics against those of existing statistics regarding Type I error rate. Results show that the performances of the four new statistics are relatively stable compared with those of existing statistics. In particular, Type I error rates of a new statistic are close to the nominal level across all sample sizes under a condition of asymptotic robustness. Other new statistics also exhibit improved Type I error control, especially with nonnormally distributed data at small sample sizes.},
author = {Jiang, Ge and Yuan, Ke Hai},
doi = {10.1080/10705511.2016.1277726},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {Satorra–Bentler's corrected statistics,nonnormality,small sample size,test statistic},
number = {4},
pages = {479--494},
publisher = {Routledge},
title = {{Four New Corrected Statistics for SEM With Small Samples and Nonnormally Distributed Data}},
url = {http://dx.doi.org/10.1080/10705511.2016.1277726 https://doi.org/10.1080/10705511.2016.1277726},
volume = {24},
year = {2017}
}
@article{Hancock2011,
abstract = {A two-step process is commonly used to evaluate data-model fit of latent variable path models, the first step addressing the measurement portion of the model and the second addressing the structural portion of the model. Unfortunately, even if the fit of the measurement portion of the model is perfect, the ability to assess the fit within the structural portion is affected by the quality of the factor-variable relations within the measurement model. The result is that models with poorer quality measurement appear to have better data-model fit, whereas models with better quality measurement appear to have worse data-model fit. The current article illustrates this phenomenon across different classes of fit indices, discusses related structural assessment problems resulting from issues of measurement quality, and endorses a supplemental modeling step evaluating the structural portion of the model in isolation from the measurement model. {\textcopyright} 2011 The Author(s).},
author = {Hancock, Gregory R. and Mueller, Ralph O.},
doi = {10.1177/0013164410384856},
file = {::},
issn = {15523888},
journal = {Educational and Psychological Measurement},
keywords = {covariance structure modeling,latent variable models,structural equation modeling},
number = {2},
pages = {306--324},
title = {{The reliability paradox in assessing structural relations within covariance structure models}},
volume = {71},
year = {2011}
}
@article{Asparouhov2014,
author = {Asparouhov, Tihomir and Muth, Bengt},
file = {::},
number = {20},
pages = {1--14},
title = {{Using Mplus individual residual plots for diagnostics and model evaluation in SEM}},
year = {2014}
}
@article{Huang2015,
abstract = {The asymptotically distribution free (ADF) method is often used to estimate parameters or test models without a normal distribution assumption on variables, both in covariance structure analysis and in correlation structure analysis. However, little has been done to study the differences in behaviors of the ADF method in covariance versus correlation structure analysis. The behaviors of 3 test statistics frequently used to evaluate structural equation models with nonnormally distributed variables, $\chi${\textless}sup{\textgreater}2{\textless}/sup{\textgreater} test T{\textless}inf{\textgreater}AGLS{\textless}/inf{\textgreater} and its small-sample variants T{\textless}inf{\textgreater}YB{\textless}/inf{\textgreater} and T{\textless}inf{\textgreater}F{\textless}/inf{\textgreater} {\textless}inf{\textgreater}(AGLS){\textless}/inf{\textgreater} were compared. Results showed that the ADF method in correlation structure analysis with test statistic T{\textless}inf{\textgreater}AGLS{\textless}/inf{\textgreater} performs much better at small sample sizes than the corresponding test for covariance structures. In contrast, test statistics T{\textless}inf{\textgreater}YB{\textless}/inf{\textgreater} and T{\textless}inf{\textgreater}F{\textless}/inf{\textgreater} {\textless}inf{\textgreater}(AGLS){\textless}/inf{\textgreater} under the same conditions generally perform better with covariance structures than with correlation structures. It is proposed that excessively large and variable condition numbers of weight matrices are a cause of poor behavior of ADF test statistics in small samples, and results showed that these condition numbers are systematically increased with substantial increase in variance as sample size decreases. Implications for research and practice are discussed.},
author = {Huang, Yafei and Bentler, Peter M.},
doi = {10.1080/10705511.2014.954078},
file = {::},
issn = {15328007},
journal = {Structural Equation Modeling},
keywords = {asymptotically distribution free,correlation structure analysis,covariance structure analysis},
number = {4},
pages = {489--503},
publisher = {Routledge},
title = {{Behavior of Asymptotically Distribution Free Test Statistics in Covariance Versus Correlation Structure Analysis}},
url = {http://dx.doi.org/10.1080/10705511.2014.954078 https://doi.org/10.1080/10705511.2014.954078},
volume = {22},
year = {2015}
}
@article{Heene2011,
abstract = {Fit indices are widely used in order to test the model fit for structural equation models. In a highly influential study, Hu and Bentler (1999) showed that certain cutoff values for these indices could be derived, which, over time, has led to the reification of these suggested thresholds as " golden rules" for establishing the fit or other aspects of structural equation models. The current study shows how differences in unique variances influence the value of the global chi-square model test and the most commonly used fit indices: Root-mean-square error of approximation, standardized root-mean-square residual, and the comparative fit index. Using data simulation, the authors illustrate how the value of the chi-square test, the root-mean-square error of approximation, and the standardized root-mean-square residual are decreased when unique variances are increased although model misspecification is present. For a broader understanding of the phenomenon, the authors used different sample sizes, number of observed variables per factor, and types of misspecification. A theoretical explanation is provided, and implications for the application of structural equation modeling are discussed. {\textcopyright} 2011 American Psychological Association.},
author = {Heene, Moritz and Hilbert, Sven and Draxler, Clemens and Ziegler, Matthias and B{\"{u}}hner, Markus},
doi = {10.1037/a0024917},
file = {::},
issn = {1082989X},
journal = {Psychological Methods},
keywords = {Fit indices,Model fit,Model misfit,Model test,Structural equation modeling},
number = {3},
pages = {319--336},
title = {{Masking Misfit in Confirmatory Factor Analysis by Increasing Unique Variances: A Cautionary Note on the Usefulness of Cutoff Values of Fit Indices}},
volume = {16},
year = {2011}
}
@article{Hayduk2014,
abstract = {Researchers using factor analysis tend to dismiss the significant ill fit of factor models by presuming that if their factor model is close-to-fitting, it is probably close to being properly causally specified. Close fit may indeed result from a model being close to properly causally specified, but close-fitting factor models can also be seriously causally misspecified. This article illustrates a variety of nonfactor causal worlds that are perfectly, but inappropriately, fit by factor models. Seeing nonfactor worlds that are perfectly yet erroneously fit via factor models should help researchers understand that close-to-fitting factor models may seriously misrepresent the world's causal structure. Statistical cautions regarding the factor model's proclivity to fit when it ought not to fit have been insufficiently publicized and are rarely heeded. A research commitment to understanding the world's causal structure, combined with clear examples of factor mismodeling should spur diagnostic assessment of significant factor model failures—including reassessment of published failing factor models.},
author = {Hayduk, Leslie},
doi = {10.1177/0013164414527449},
file = {::},
isbn = {0013164414},
issn = {15523888},
journal = {Educational and Psychological Measurement},
keywords = {close fit,factor analysis,factor model,structural equation modeling,testing},
number = {6},
pages = {905--926},
title = {{Seeing Perfectly Fitting Factor Models That Are Causally Misspecified: Understanding That Close-Fitting Models Can Be Worse}},
volume = {74},
year = {2014}
}
@article{McNeish2018,
abstract = {Latent variable modeling is a popular and flexible statistical framework. Concomitant with fitting latent variable models is assessment of how well the theoretical model fits the observed data. Although firm cutoffs for these fit indexes are often cited, recent statistical proofs and simulations have shown that these fit indexes are highly susceptible to measurement quality. For instance, a root mean square error of approximation (RMSEA) value of 0.06 (conventionally thought to indicate good fit) can actually indicate poor fit with poor measurement quality (e.g., standardized factors loadings of around 0.40). Conversely, an RMSEA value of 0.20 (conventionally thought to indicate very poor fit) can indicate acceptable fit with very high measurement quality (standardized factor loadings around 0.90). Despite the wide-ranging effect on applications of latent variable models, the high level of technical detail involved with this phenomenon has curtailed the exposure of these important findings to empirical researchers who are employing these methods. This article briefly reviews these methodological studies in minimal technical detail and provides a demonstration to easily quantify the large influence measurement quality has on fit index values and how greatly the cutoffs would change if they were derived under an alternative level of measurement quality. Recommendations for best practice are also discussed.},
author = {McNeish, Daniel and An, Ji and Hancock, Gregory R.},
doi = {10.1080/00223891.2017.1281286},
file = {::},
issn = {00223891},
journal = {Journal of Personality Assessment},
number = {1},
pages = {43--52},
title = {{The Thorny Relation Between Measurement Quality and Fit Index Cutoffs in Latent Variable Models}},
volume = {100},
year = {2018}
}
@article{Tomarken2005,
abstract = {Because structural equation modeling (SEM) has become a very popular data-analytic technique, it is important for clinical scientists to have a balanced perception of its strengths and limitations. We review several strengths of SEM, with a particular focus on recent innovations (e.g., latent growth modeling, multilevel SEM models, and approaches for dealing with missing data and with violations of normality assumptions) that underscore how SEM has become a broad data-analytic framework with flexible and unique capabilities. We also consider several limitations of SEM and some misconceptions that it tends to elicit. Major themes emphasized are the problem of omitted variables, the importance of lower-order model components, potential limitations of models judged to be well fitting, the inaccuracy of some commonly used rules of thumb, and the importance of study design. Throughout, we offer recommendations for the conduct of SEM analyses and the reporting of results.},
author = {Tomarken, Andrew J. and Waller, Niels G.},
doi = {10.1146/annurev.clinpsy.1.102803.144239},
file = {::},
issn = {1548-5943},
journal = {Annual Review of Clinical Psychology},
keywords = {a balanced,abstract because structural equation,causal models,clinical scientists to have,confirmatory factor analysis,covariance structure analysis,has become a very,it is important for,latent variables,modeling,path analysis,pop-,sem,statistical modeling,ular data-analytic technique},
number = {1},
pages = {31--65},
pmid = {17716081},
title = {{Structural Equation Modeling: Strengths, Limitations, and Misconceptions}},
volume = {1},
year = {2005}
}
@article{Hallgren2019a,
abstract = {Introduction: Structural equation modeling (SEM) is a multivariate data analytic technique used in many domains of addictive behaviors research. SEM results are usually summarized and communicated through statistical tables and path diagrams, which emphasize path coefficients and global fit without showing specific quantitative values of data points that underlie the model results. Data visualization methods are often absent in SEM research, which may limit the quality and impact of SEM research by reducing data transparency, obscuring unexpected data anomalies and unmodeled heterogeneity, and inhibiting the communication of SEM research findings to research stakeholders who do not have advanced statistical training in SEM. Methods and results: In this report, we show how data visualization methods can address these limitations and improve the quality of SEM-based addictive behaviors research. We first introduce SEM and data visualization methodologies and differentiate data visualizations from model visualizations that are commonly used in SEM, such as path diagrams. We then discuss ways researchers may utilize data visualization in SEM research, including by obtaining estimates of latent variables and by visualizing multivariate relations in two-dimensional figures. R syntax is provided to help others generate data visualizations for several types of effects commonly modeled in SEM, including correlation, regression, moderation, and simple mediation. Discussion: The techniques outlined here may help spur the use of data visualization in SEM-based addictive behaviors research. Using data visualization in SEM may enhance methodological transparency and improve communication of research findings.},
author = {Hallgren, Kevin A. and McCabe, Connor J. and King, Kevin M. and Atkins, David C.},
doi = {10.1016/j.addbeh.2018.08.030},
file = {::},
issn = {18736327},
journal = {Addictive Behaviors},
keywords = {Applied data analysis,Data visualization,Latent variable modeling,Mediation,Moderation,Structural equation model},
number = {March 2018},
pages = {74--82},
publisher = {Elsevier},
title = {{Beyond path diagrams: Enhancing applied structural equation modeling research through data visualization}},
url = {https://doi.org/10.1016/j.addbeh.2018.08.030},
volume = {94},
year = {2019}
}
@article{McIntosh2007,
abstract = {With seemingly few exceptions, current practice in structural equation modelling (SEM) aims at establishing close rather than exact fit between hypothetical models and observed data. This orientation has gone without serious challenge until the appearance of a sharp critique by Barrett (2007), who suggests discontinuing the use of approximate fit indices (AFIs) in SEM. The present article provides a commentary and elaboration on the key aspects of Barrett's position, and also supplies further practical guidance and methodological references to applied researchers, who may be motivated to significantly alter their modelling practices in order to address the issues he raises. I strongly support his calls for performing more detailed diagnostic examinations of model misfit when confronted with a significant chi-square ($\chi$2) test statistic, rather than simply deferring to AFIs. However, I do not second the recommendation that assessments of a model's predictive accuracy (e.g., R2 values) can supplant a focused search for the reasons underlying significant global misfit. Accordingly, some misconceptions about the relationship between global model fit and predictive accuracy are pointed out, and modified advice is given to practitioners. Issues surrounding how to properly appraise a model yielding a non-significant $\chi$2 are also discussed, as are concerns raised by Barrett about small sample size and power in SEM. It is concluded that AFIs offer little value-added in SEM practice, given the wide variety of available methods for performing detailed model assessments. However, I leave the issue of whether AFIs should be completely abandoned to future research. {\textcopyright} 2006 Elsevier Ltd. All rights reserved.},
author = {McIntosh, Cameron N.},
doi = {10.1016/j.paid.2006.09.020},
file = {::},
issn = {01918869},
journal = {Personality and Individual Differences},
keywords = {Approximate fit indices,Exact fit,Structural equation modelling},
number = {5},
pages = {859--867},
title = {{Rethinking fit assessment in structural equation modelling: A commentary and elaboration on Barrett (2007)}},
volume = {42},
year = {2007}
}
@article{Steiger2007,
abstract = {Barrett's (2007) article on "adjudging model fit" raises some important issues concerning the use of global fit indices to justify weak structural equation models, and recommends prohibition of future use of such indices. In this commentary, I critique Barrett's presentation, and show that his recommendations are (a) unnecessarily regressive, and (b) likely to be ignored. Then I suggest a constructive alternative in line with the spirit of his concerns. {\textcopyright} 2006 Elsevier Ltd. All rights reserved.},
author = {Steiger, James H.},
doi = {10.1016/j.paid.2006.09.017},
file = {::},
issn = {01918869},
journal = {Personality and Individual Differences},
keywords = {Confidence interval estimation,Fit indices,Hypothesis testing,Structural equation modeling},
number = {5},
pages = {893--898},
title = {{Understanding the limitations of global fit assessment in structural equation modeling}},
volume = {42},
year = {2007}
}
@article{Rose2017,
abstract = {The purpose of this study was to evaluate the sensitivity of selected fit index statistics in determining model fit in structural equation modeling (SEM). The results indicated a large dependency on correlation magnitude of the input correlation matrix, with mixed results when the correlation magnitudes were low and a primary indication of good model fit. This was due to the default SEM method of Maximum Likelihood that assumes unstandardized correlation values. However, this warning is not well-known, and is only obscurely mentioned in some textbooks. Many SEM computer software programs do not give appropriate error indications that the results are unsubstantiated when standardized correlation values are provided.},
author = {Rose, Sarah A. and Markman, Barry and Sawilowsky, Shlomo},
doi = {10.22237/jmasm/1493597040},
file = {::},
issn = {15389472},
journal = {Journal of Modern Applied Statistical Methods},
keywords = {CFI,Covariance matrices,Fit indices,RMSEA,SEM,SRMR,Structural equation model},
number = {1},
pages = {69--85},
title = {{Limitations in the systematic analysis of structural equation model fit indices}},
volume = {16},
year = {2017}
}
@article{Kaplan2010,
author = {Kaplan, David},
doi = {10.1207/s15327906mbr2502},
file = {::},
number = {May 2014},
pages = {37--41},
title = {{Multivariate Behavioral Evaluating and Modifying Covariance Structure Models : A Review and Recommendation}},
volume = {3171},
year = {2010}
}
@article{West2012,