diff --git a/CHANGELOG.md b/CHANGELOG.md index 345c4f15..a8a0cabe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- 23 new distance/similarity + 1. Dennis + 2. Digby + 3. Dispersion + 4. Doolittle + 5. Eyraud + 6. Fager & McGowan + 7. Faith + 8. Fleiss-Levin-Paik + 9. Forbes I + 10. Forbes II + 11. Fossum + 12. Gilbert & Wells + 13. Goodall + 14. Goodman & Kruskal's Lambda + 15. Goodman & Kruskal Lambda-r + 16. Guttman's Lambda A + 17. Guttman's Lambda B + 18. Hamann + 19. Harris & Lahey + 20. Hawkins & Dotson + 21. Kendall's Tau + 22. Kent & Foster I + 23. Kent & Foster II ### Changed - `sort` parameter added to `relabel` method - Document modified diff --git a/Document/Distance.ipynb b/Document/Distance.ipynb index 3277728e..2d9e7622 100644 --- a/Document/Distance.ipynb +++ b/Document/Distance.ipynb @@ -4,7 +4,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "

Please cite us if you use the software

" + "

Please cite us if you use the software

\n", + "\n", + "\n", + " \n", + " " ] }, { @@ -1578,57 +1588,1022 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## References" + "## Dennis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "
1- C. C. Little, \"Abydos Documentation,\" 2018.
\n", - "\n", - "
2- V. Dallmeier, C. Lindig, and A. Zeller, \"Lightweight defect localization for Java,\" in European conference on object-oriented programming, 2005: Springer, pp. 528-550.
\n", - "\n", - "
3- R. Abreu, P. Zoeteweij, and A. J. Van Gemund, \"An evaluation of similarity coefficients for software fault localization,\" in 2006 12th Pacific Rim International Symposium on Dependable Computing (PRDC'06), 2006: IEEE, pp. 39-46.
\n", - "\n", - "
4- M. R. Anderberg, Cluster analysis for applications: probability and mathematical statistics: a series of monographs and textbooks. Academic press, 2014.
\n", - "\n", - "
5- A. M. Andrés and P. F. Marzo, \"Delta: A new measure of agreement between two raters,\" British journal of mathematical and statistical psychology, vol. 57, no. 1, pp. 1-19, 2004.
\n", - "\n", - "
6- C. Baroni-Urbani and M. W. Buser, \"Similarity of binary data,\" Systematic Zoology, vol. 25, no. 3, pp. 251-259, 1976.
\n", - "\n", - "
7- V. Batagelj and M. Bren, \"Comparing resemblance measures,\" Journal of classification, vol. 12, no. 1, pp. 73-90, 1995.
\n", - "\n", - "
8- F. B. Baulieu, \"A classification of presence/absence based dissimilarity coefficients,\" Journal of Classification, vol. 6, no. 1, pp. 233-246, 1989.
\n", - "\n", - "
9- F. B. Baulieu, \"Two variant axiom systems for presence/absence based dissimilarity coefficients,\" Journal of Classification, vol. 14, no. 1, pp. 0159-0170, 1997.
\n", - "\n", - "
10- R. Benini, Principii di demografia. Barbera, 1901.
\n", - "\n", - "
11- G. N. Lance and W. T. Williams, \"Computer programs for hierarchical polythetic classification (“similarity analyses”),\" The Computer Journal, vol. 9, no. 1, pp. 60-64, 1966.
\n", - "\n", - "
12- G. N. Lance and W. T. Williams, \"Mixed-Data Classificatory Programs I - Agglomerative Systems,\" Australian Computer Journal, vol. 1, no. 1, pp. 15-20, 1967.
\n", - "\n", - "
13- P. W. Clement, \"A formula for computing inter-observer agreement,\" Psychological Reports, vol. 39, no. 1, pp. 257-258, 1976.
\n", - "\n", - "
14- V. Consonni and R. Todeschini, \"New similarity coefficients for binary data,\" Match-Communications in Mathematical and Computer Chemistry, vol. 68, no. 2, p. 581, 2012.
" + "Dennis similarity [[15]](#ref15)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "\n", - "" + "$$sim_{Dennis} =\n", + "\\frac{TP-\\frac{(TP+FP)\\times(TP+FN)}{POP}}{\\sqrt{\\frac{(TP+FP)\\times(TP+FN)}{POP}}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Dennis)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Digby" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Digby correlation [[16]](#ref16)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{Digby} =\n", + "\\frac{(TP \\times TN) ^\\frac{3}{4}-(FP \\times FN)^\\frac{3}{4}}{(TP \\times TN)^\\frac{3}{4}+(FP \\times FN)^\\frac{3}{4}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Digby)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dispersion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dispersion correlation [[17]](#ref17)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{dispersion} =\n", + "\\frac{TP \\times TN -FP \\times FN}{POP^2}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Dispersion)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Doolittle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Doolittle similarity [[18]](#ref18)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Doolittle} =\n", + "\\frac{(TP\\times POP - (TP+FP)\\times(TP+FN))^2}{(TP+FP)\\times(TP+FN)\\times(FP+TN)\\times(FN+TN)}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Doolittle)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Eyraud" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Eyraud similarity [[19]](#ref19)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Eyraud} =\n", + "\\frac{TP-(TP+FP)\\times(TP+FN)}{(TP+FP)\\times(TP+FN)\\times(FP+TN)\\times(FN+TN)}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Eyraud)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fager & McGowan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fager & McGowan similarity [[20]](#ref20) [[21]](#ref21)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{FagerMcGowan} =\n", + "\\frac{TP}{\\sqrt{(TP+FP)\\times(TP+FN)}} - \\frac{1}{2\\sqrt{max(TP+FP, TP+FN)}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.FagerMcGowan)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Faith" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Faith similarity [[22]](#ref22)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Faith} =\n", + "\\frac{TP+\\frac{TN}{2}}{POP}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Faith)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fleiss-Levin-Paik" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fleiss-Levin-Paik similarity [[23]](#ref23)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{FleissLevinPaik} =\n", + "\\frac{2 \\times TN}{2 \\times TN + FP + FN}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.FleissLevinPaik)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forbes I" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Forbes I similarity [[24]](#ref24) [[25]](#ref25)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{ForbesI} =\n", + "\\frac{POP \\times TP}{(TP+FP)\\times(TP+FN)}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.ForbesI)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forbes II" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Forbes II correlation [[26]](#ref26)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{ForbesII} =\n", + "\\frac{FP \\times FN-TP \\times TN}{(TP+FP)\\times(TP+FN) - POP \\times min(TP+FP, TP+FN)}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.ForbesII)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fossum" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fossum similarity [[27]](#ref27)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Fossum} =\n", + "\\frac{POP \\times (TP-\\frac{1}{2})^2}{(TP+FP)\\times(TP+FN)}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Fossum)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gilbert & Wells" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Gilbert & Wells similarity [[28]](#ref28)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{GilbertWells} =\n", + "ln \\frac{POP^3}{2\\pi (TP+FP)\\times(TP+FN)\\times(FP+TN)\\times(FN+TN)} +\n", + "2ln \\frac{POP! \\times TP! \\times FP! \\times FN! \\times TN!}{(TP+FP)! \\times (TP+FN)! \\times (FP+TN)! \\times (FN+TN)!}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.GilbertWells)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Goodall" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Goodall similarity [[29]](#ref29) [[30]](#ref30)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Goodall} =\\frac{2}{\\pi} \\sin^{-1}\\Big(\n", + "\\sqrt{\\frac{TP + TN}{POP}}\n", + "\\Big)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Goodall)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Goodman & Kruskal's Lambda" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Goodman & Kruskal's Lambda similarity [[31]](#ref31)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{GK_\\lambda} =\n", + "\\frac{\\frac{1}{2}((max(TP,FP)+max(FN,TN)+max(TP,FN)+max(FP,TN))-\n", + "(max(TP+FP,FN+TN)+max(TP+FN,FP+TN)))}\n", + "{POP-\\frac{1}{2}(max(TP+FP,FN+TN)+max(TP+FN,FP+TN))}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.GoodmanKruskalLambda)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Goodman & Kruskal Lambda-r" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Goodman & Kruskal Lambda-r correlation [[31]](#ref31)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{GK_{\\lambda_r}} =\n", + "\\frac{TP + TN - \\frac{1}{2}(max(TP+FP,FN+TN)+max(TP+FN,FP+TN))}\n", + "{POP - \\frac{1}{2}(max(TP+FP,FN+TN)+max(TP+FN,FP+TN))}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.GoodmanKruskalLambdaR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Guttman's Lambda A" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Guttman's Lambda A similarity [[32]](#ref32)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Guttman_{\\lambda_a}} =\n", + "\\frac{max(TP, FN) + max(FP, TN) - max(TP+FP, FN+TN)}{POP - max(TP+FP, FN+TN)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.GuttmanLambdaA)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Guttman's Lambda B" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Guttman's Lambda B similarity [[32]](#ref32)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{Guttman_{\\lambda_b}} =\n", + "\\frac{max(TP, FP) + max(FN, TN) - max(TP+FN, FP+TN)}{POP - max(TP+FN, FP+TN)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.GuttmanLambdaB)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hamann" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hamann correlation [[33]](#ref33)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{Hamann} =\n", + "\\frac{TP+TN-FP-FN}{POP}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.Hamann)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Harris & Lahey" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Harris & Lahey similarity [[34]](#ref34)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{HarrisLahey} =\n", + "\\frac{TP}{TP+FP+FN} \\times \\frac{2TN+FP+FN}{2POP}+\n", + "\\frac{TN}{TN+FP+FN} \\times \\frac{2TP+FP+FN}{2POP}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.HarrisLahey)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hawkins & Dotson" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hawkins & Dotson similarity [[35]](#ref35)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{HawkinsDotson} =\n", + "\\frac{1}{2} \\times \\Big(\\frac{TP}{TP+FP+FN}+\\frac{TN}{FP+FN+TN}\\Big)\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.HawkinsDotson)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kendall's Tau" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kendall's Tau correlation [[36]](#ref36)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$corr_{KendallTau} =\n", + "\\frac{2 \\times (TP+TN-FP-FN)}{POP \\times (POP-1)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.KendallTau)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kent & Foster I" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kent & Foster I similarity [[37]](#ref37)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{KentFosterI} =\n", + "\\frac{TP-\\frac{(TP+FP)\\times(TP+FN)}{TP+FP+FN}}{TP-\\frac{(TP+FP)\\times(TP+FN)}{TP+FP+FN}+FP+FN}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.KentFosterI)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kent & Foster II" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Kent & Foster II similarity [[37]](#ref37)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$sim_{KentFosterII} =\n", + "\\frac{TN-\\frac{(FP+TN)\\times(FN+TN)}{FP+FN+TN}}{TN-\\frac{(FP+TN)\\times(FP+TN)}{FP+FN+TN}+FP+FN}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.distance(metric=DistanceType.KentFosterII)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
1- C. C. Little, \"Abydos Documentation,\" 2018.
\n", + "\n", + "
2- V. Dallmeier, C. Lindig, and A. Zeller, \"Lightweight defect localization for Java,\" in European conference on object-oriented programming, 2005: Springer, pp. 528-550.
\n", + "\n", + "
3- R. Abreu, P. Zoeteweij, and A. J. Van Gemund, \"An evaluation of similarity coefficients for software fault localization,\" in 2006 12th Pacific Rim International Symposium on Dependable Computing (PRDC'06), 2006: IEEE, pp. 39-46.
\n", + "\n", + "
4- M. R. Anderberg, Cluster analysis for applications: probability and mathematical statistics: a series of monographs and textbooks. Academic press, 2014.
\n", + "\n", + "
5- A. M. Andrés and P. F. Marzo, \"Delta: A new measure of agreement between two raters,\" British journal of mathematical and statistical psychology, vol. 57, no. 1, pp. 1-19, 2004.
\n", + "\n", + "
6- C. Baroni-Urbani and M. W. Buser, \"Similarity of binary data,\" Systematic Zoology, vol. 25, no. 3, pp. 251-259, 1976.
\n", + "\n", + "
7- V. Batagelj and M. Bren, \"Comparing resemblance measures,\" Journal of classification, vol. 12, no. 1, pp. 73-90, 1995.
\n", + "\n", + "
8- F. B. Baulieu, \"A classification of presence/absence based dissimilarity coefficients,\" Journal of Classification, vol. 6, no. 1, pp. 233-246, 1989.
\n", + "\n", + "
9- F. B. Baulieu, \"Two variant axiom systems for presence/absence based dissimilarity coefficients,\" Journal of Classification, vol. 14, no. 1, pp. 0159-0170, 1997.
\n", + "\n", + "
10- R. Benini, Principii di demografia. Barbera, 1901.
\n", + "\n", + "
11- G. N. Lance and W. T. Williams, \"Computer programs for hierarchical polythetic classification (“similarity analyses”),\" The Computer Journal, vol. 9, no. 1, pp. 60-64, 1966.
\n", + "\n", + "
12- G. N. Lance and W. T. Williams, \"Mixed-Data Classificatory Programs I - Agglomerative Systems,\" Australian Computer Journal, vol. 1, no. 1, pp. 15-20, 1967.
\n", + "\n", + "
13- P. W. Clement, \"A formula for computing inter-observer agreement,\" Psychological Reports, vol. 39, no. 1, pp. 257-258, 1976.
\n", + "\n", + "
14- V. Consonni and R. Todeschini, \"New similarity coefficients for binary data,\" Match-Communications in Mathematical and Computer Chemistry, vol. 68, no. 2, p. 581, 2012.
\n", + "\n", + "
15- S. F. Dennis, \"The Construction of a Thesaurus Automatically From,\" in Statistical Association Methods for Mechanized Documentation: Symposium Proceedings, 1965, vol. 269: US Government Printing Office, p. 61.
\n", + "\n", + "
16- P. G. Digby, \"Approximating the tetrachoric correlation coefficient,\" Biometrics, pp. 753-757, 1983.
\n", + "\n", + "
17- IBM Corp, \"IBM SPSS Statistics Algorithms,\" ed: IBM Corp Armonk, NY, USA, 2017.
\n", + "\n", + "
18- M. H. Doolittle, \"The verification of predictions,\" Bulletin of the Philosophical Society of Washington, vol. 7, pp. 122-127, 1885.
\n", + "\n", + "
19- H. Eyraud, \"Les principes de la mesure des correlations,\" Ann. Univ. Lyon, III. Ser., Sect. A, vol. 1, no. 30-47, p. 111, 1936.
\n", + "\n", + "
20- E. W. Fager, \"Determination and analysis of recurrent groups,\" Ecology, vol. 38, no. 4, pp. 586-595, 1957.
\n", + "\n", + "
21- E. W. Fager and J. A. McGowan, \"Zooplankton Species Groups in the North Pacific: Co-occurrences of species can be used to derive groups whose members react similarly to water-mass types,\" Science, vol. 140, no. 3566, pp. 453-460, 1963.
\n", + "\n", + "
22- D. P. Faith, \"Asymmetric binary similarity measures,\" Oecologia, vol. 57, pp. 287-290, 1983.
\n", + "\n", + "
23- J. L. Fleiss, B. Levin, and M. C. Paik, Statistical methods for rates and proportions. john wiley & sons, 2013.
\n", + "\n", + "
24- S. A. Forbes, On the local distribution of certain Illinois fishes: an essay in statistical ecology. Illinois State Laboratory of Natural History, 1907.
\n", + "\n", + "
25- A. Mozley, \"The statistical analysis of the distribution of pond molluscs in western Canada,\" The American Naturalist, vol. 70, no. 728, pp. 237-244, 1936.
\n", + "\n", + "
26- S. A. Forbes, \"Method of determining and measuring the associative relations of species,\" Science, vol. 61, no. 1585, pp. 518-524, 1925.
\n", + "\n", + "
27- E. G. Fossum and G. Kaskey, \"Optimization and standardization of information retrieval language and systems,\" SPERRY RAND CORP PHILADELPHIA PA UNIVAC DIV, 1966.
\n", + "\n", + "
28- N. Gilbert and T. C. Wells, \"Analysis of quadrat data,\" The Journal of Ecology, pp. 675-685, 1966.
\n", + "\n", + "
29- D. W. Goodall, \"The distribution of the matching coefficient,\" Biometrics, pp. 647-656, 1967.
\n", + "\n", + "
30- B. Austin and R. R. Colwell, \"Evaluation of some coefficients for use in numerical taxonomy of microorganisms,\" International Journal of Systematic and Evolutionary Microbiology, vol. 27, no. 3, pp. 204-210, 1977.
\n", + "\n", + "
31- L. A. Goodman, W. H. Kruskal, L. A. Goodman, and W. H. Kruskal, Measures of association for cross classifications. Springer, 1979.
\n", + "\n", + "
32- L. Guttman, \"An outline of the statistical theory of prediction,\" The prediction of personal adjustment, vol. 48, pp. 253-318, 1941.
\n", + "\n", + "
33- U. Hamann, \"Merkmalsbestand und verwandtschaftsbeziehungen der farinosae: ein beitrag zum system der monokotyledonen,\" Willdenowia, pp. 639-768, 1961.
\n", + "\n", + "
34- F. C. Harris and B. B. Lahey, \"A method for combining occurrence and nonoccurrence interobserver agreement scores,\" Journal of Applied Behavior Analysis, vol. 11, no. 4, pp. 523-527, 1978.
\n", + "\n", + "
35- R. P. Hawkins and V. A. Dotson, \"Reliability Scores That Delude: An Alice in Wonderland Trip Through the Misleading Characteristics of Inter-Observer Agreement Scores in Interval Recording,\" 1973.
\n", + "\n", + "
36- M. G. Kendall, \"A new measure of rank correlation,\" Biometrika, vol. 30, no. 1/2, pp. 81-93, 1938.
\n", + "\n", + "
37- R. N. Kent and S. L. Foster, \"Direct observational procedures: Methodological issues in naturalistic settings,\" Handbook of behavioral assessment, pp. 279-328, 1977.
" ] } ], diff --git a/Test/verified_test.py b/Test/verified_test.py index ffb959d9..44e72f8a 100644 --- a/Test/verified_test.py +++ b/Test/verified_test.py @@ -549,5 +549,97 @@ 0.48072545510682463 >>> cm2.distance(metric=DistanceType.ConsonniTodeschiniV)[1] 0.4003930264973547 +>>> cm1.distance(metric=DistanceType.Dennis)[1] +13.857142857142858 +>>> cm2.distance(metric=DistanceType.Dennis)[1] +10.028539207654113 +>>> cm1.distance(metric=DistanceType.Digby)[1] +0.9774244829419212 +>>> cm2.distance(metric=DistanceType.Digby)[1] +0.9491281473458171 +>>> cm1.distance(metric=DistanceType.Dispersion)[1] +0.002524989587671803 +>>> cm2.distance(metric=DistanceType.Dispersion)[1] +0.002502212619741774 +>>> cm1.distance(metric=DistanceType.Doolittle)[1] +0.24744247205785666 +>>> cm2.distance(metric=DistanceType.Doolittle)[1] +0.13009912077202224 +>>> cm1.distance(metric=DistanceType.Eyraud)[1] +-1.438198553583169e-06 +>>> cm2.distance(metric=DistanceType.Eyraud)[1] +-1.5399964580081465e-06 +>>> cm1.distance(metric=DistanceType.FagerMcGowan)[1] +0.25 +>>> cm2.distance(metric=DistanceType.FagerMcGowan)[1] +0.16102422643817918 +>>> cm1.distance(metric=DistanceType.Faith)[1] +0.4987244897959184 +>>> cm2.distance(metric=DistanceType.Faith)[1] +0.4968112244897959 +>>> cm1.distance(metric=DistanceType.FleissLevinPaik)[1] +0.9974358974358974 +>>> cm2.distance(metric=DistanceType.FleissLevinPaik)[1] +0.9955041746949261 +>>> cm1.distance(metric=DistanceType.ForbesI)[1] +98.0 +>>> cm2.distance(metric=DistanceType.ForbesI)[1] +52.266666666666666 +>>> cm1.distance(metric=DistanceType.ForbesII)[1] +0.49743589743589745 +>>> cm2.distance(metric=DistanceType.ForbesII)[1] +0.3953727506426735 +>>> cm1.distance(metric=DistanceType.Fossum)[1] +110.25 +>>> cm2.distance(metric=DistanceType.Fossum)[1] +58.8 +>>> cm1.distance(metric=DistanceType.GilbertWells)[1] +20.176174477346354 +>>> cm2.distance(metric=DistanceType.GilbertWells)[1] +16.717742356979358 +>>> abs(cm1.distance(metric=DistanceType.Goodall)[1] - 0.9544884026871964) < 1e-15 +True +>>> abs(cm2.distance(metric=DistanceType.Goodall)[1] - 0.9397552079794624) < 1e-15 +True +>>> cm1.distance(metric=DistanceType.GoodmanKruskalLambda)[1] +0.0 +>>> cm2.distance(metric=DistanceType.GoodmanKruskalLambda)[1] +0.0 +>>> cm1.distance(metric=DistanceType.GoodmanKruskalLambdaR)[1] +0.0 +>>> cm2.distance(metric=DistanceType.GoodmanKruskalLambdaR)[1] +-0.2727272727272727 +>>> cm1.distance(metric=DistanceType.GuttmanLambdaA)[1] +0.0 +>>> cm2.distance(metric=DistanceType.GuttmanLambdaA)[1] +0.0 +>>> cm1.distance(metric=DistanceType.GuttmanLambdaB)[1] +0.0 +>>> cm2.distance(metric=DistanceType.GuttmanLambdaB)[1] +0.0 +>>> cm1.distance(metric=DistanceType.Hamann)[1] +0.9897959183673469 +>>> cm2.distance(metric=DistanceType.Hamann)[1] +0.9821428571428571 +>>> cm1.distance(metric=DistanceType.HarrisLahey)[1] +0.3367085964820711 +>>> cm2.distance(metric=DistanceType.HarrisLahey)[1] +0.22761577457069784 +>>> cm1.distance(metric=DistanceType.HawkinsDotson)[1] +0.6641091219096334 +>>> cm2.distance(metric=DistanceType.HawkinsDotson)[1] +0.606635407786303 +>>> cm1.distance(metric=DistanceType.KendallTau)[1] +0.0025282143508744493 +>>> cm2.distance(metric=DistanceType.KendallTau)[1] +0.00250866630176975 +>>> cm1.distance(metric=DistanceType.KentFosterI)[1] +-0.19999999999999996 +>>> cm2.distance(metric=DistanceType.KentFosterI)[1] +-0.23529411764705888 +>>> cm1.distance(metric=DistanceType.KentFosterII)[1] +-0.0012804097311239404 +>>> cm2.distance(metric=DistanceType.KentFosterII)[1] +-0.002196997436837158 """ diff --git a/pycm/pycm_compare.py b/pycm/pycm_compare.py index 3b0f7284..4df3e5ba 100644 --- a/pycm/pycm_compare.py +++ b/pycm/pycm_compare.py @@ -314,7 +314,8 @@ def __compare_assign_handler__( compare.classes = list(cm_dict.values())[0].classes compare.class_weight = {k: 1 for k in compare.classes} compare.class_benchmark_weight = {k: 1 for k in CLASS_BENCHMARK_LIST} - compare.overall_benchmark_weight = {k: 0 if k in KAPPA_BENCHMARK_LIST[1:] else 1 for k in OVERALL_BENCHMARK_LIST} + compare.overall_benchmark_weight = { + k: 0 if k in KAPPA_BENCHMARK_LIST[1:] else 1 for k in OVERALL_BENCHMARK_LIST} compare.digit = digit compare.best = None compare.best_name = None diff --git a/pycm/pycm_distance.py b/pycm/pycm_distance.py index 19298b1b..4dba3a2f 100644 --- a/pycm/pycm_distance.py +++ b/pycm/pycm_distance.py @@ -42,6 +42,29 @@ class DistanceType(Enum): ConsonniTodeschiniIII = "ConsonniTodeschiniIII" ConsonniTodeschiniIV = "ConsonniTodeschiniIV" ConsonniTodeschiniV = "ConsonniTodeschiniV" + Dennis = "Dennis" + Digby = "Digby" + Dispersion = "Dispersion" + Doolittle = "Doolittle" + Eyraud = "Eyraud" + FagerMcGowan = "FagerMcGowan" + Faith = "Faith" + FleissLevinPaik = "FleissLevinPaik" + ForbesI = "ForbesI" + ForbesII = "ForbesII" + Fossum = "Fossum" + GilbertWells = "GilbertWells" + Goodall = "Goodall" + GoodmanKruskalLambda = "GoodmanKruskalLambda" + GoodmanKruskalLambdaR = "GoodmanKruskalLambdaR" + GuttmanLambdaA = "GuttmanLambdaA" + GuttmanLambdaB = "GuttmanLambdaB" + Hamann = "Hamann" + HarrisLahey = "HarrisLahey" + HawkinsDotson = "HawkinsDotson" + KendallTau = "KendallTau" + KentFosterI = "KentFosterI" + KentFosterII = "KentFosterII" def AMPLE_calc(TP, FP, FN, TN): @@ -679,6 +702,521 @@ def ConsonniTodeschiniV_calc(TP, FP, FN, TN): return "None" +def Dennis_calc(TP, FP, FN, TN): + """ + Calculate Dennis similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Dennis similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = ((TP + FP) * (TP + FN)) / n + return (TP - part1) / math.sqrt(part1) + except Exception: + return "None" + + +def Digby_calc(TP, FP, FN, TN): + """ + Calculate Digby correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Digby correlation as float + """ + try: + part1 = (TP * TN) ** 0.75 + part2 = (FP * FN) ** 0.75 + return (part1 - part2) / (part1 + part2) + except Exception: + return "None" + + +def Dispersion_calc(TP, FP, FN, TN): + """ + Calculate Dispersion correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Dispersion correlation as float + """ + try: + n = TP + FP + FN + TN + part1 = TP * TN + part2 = FP * FN + return (part1 - part2) / (n ** 2) + except Exception: + return "None" + + +def Doolittle_calc(TP, FP, FN, TN): + """ + Calculate Doolittle similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Doolittle similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = (TP + FP) * (TP + FN) + part2 = (TN + FP) * (TN + FN) + return ((TP * n - part1) ** 2) / (part1 * part2) + except Exception: + return "None" + + +def Eyraud_calc(TP, FP, FN, TN): + """ + Calculate Eyraud similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Eyraud similarity as float + """ + try: + part1 = (TP + FP) * (TP + FN) + part2 = (TN + FP) * (TN + FN) + return (TP - part1) / (part1 * part2) + except Exception: + return "None" + + +def FagerMcGowan_calc(TP, FP, FN, TN): + """ + Calculate Fager & McGowan similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Fager & McGowan similarity as float + """ + try: + part1 = math.sqrt((TP + FP) * (TP + FN)) + part2 = math.sqrt(max((TP + FP), (TP + FN))) + return (TP / part1) - (1 / (2 * part2)) + except Exception: + return "None" + + +def Faith_calc(TP, FP, FN, TN): + """ + Calculate Faith similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Faith similarity as float + """ + try: + n = TP + FP + FN + TN + return (TP + (TN / 2)) / n + except Exception: + return "None" + + +def FleissLevinPaik_calc(TP, FP, FN, TN): + """ + Calculate Fleiss-Levin-Paik similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Fleiss-Levin-Paik similarity as float + """ + try: + part1 = 2 * TN + return part1 / (part1 + FP + FN) + except Exception: + return "None" + + +def ForbesI_calc(TP, FP, FN, TN): + """ + Calculate Forbes I similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Forbes I similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = (TP + FP) * (TP + FN) + return (n * TP) / part1 + except Exception: + return "None" + + +def ForbesII_calc(TP, FP, FN, TN): + """ + Calculate Forbes II correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Forbes II correlation as float + """ + try: + n = TP + FP + FN + TN + part1 = (FP * FN) - (TP * TN) + part2 = (TP + FP) * (TP + FN) + part3 = min((TP + FP), (TP + FN)) + return part1 / (part2 - (n * part3)) + except Exception: + return "None" + + +def Fossum_calc(TP, FP, FN, TN): + """ + Calculate Fossum similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Fossum similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = (TP - 0.5) ** 2 + part2 = (TP + FP) * (TP + FN) + return (n * part1) / part2 + except Exception: + return "None" + + +def GilbertWells_calc(TP, FP, FN, TN): + """ + Calculate Gilbert & Wells similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Gilbert & Wells similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = (TP + FP) * (TP + FN) * (TN + FP) * (TN + FN) + part2 = math.factorial(TP + FP) * math.factorial(TP + FN) * \ + math.factorial(TN + FP) * math.factorial(TN + FN) + part3 = math.factorial(n) * math.factorial(TP) * \ + math.factorial(FP) * math.factorial(FN) * math.factorial(TN) + return math.log((n ** 3) / (2 * math.pi * part1)) + \ + 2 * math.log(part3 / part2) + except Exception: + return "None" + + +def Goodall_calc(TP, FP, FN, TN): + """ + Calculate Goodall similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Goodall similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = math.sqrt((TP + TN) / n) + return (2 / math.pi) * math.asin(part1) + except Exception: + return "None" + + +def GoodmanKruskalLambda_calc(TP, FP, FN, TN): + """ + Calculate Goodman & Kruskal's Lambda similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Goodman & Kruskal's Lambda similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = max(TP, FP) + max(FN, TN) + max(TP, FN) + max(FP, TN) + part2 = max(TP + FP, FN + TN) + max(TP + FN, FP + TN) + return (0.5 * (part1 - part2)) / (n - 0.5 * part2) + except Exception: + return "None" + + +def GoodmanKruskalLambdaR_calc(TP, FP, FN, TN): + """ + Calculate Goodman & Kruskal Lambda-r correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Goodman & Kruskal Lambda-r correlation as float + """ + try: + n = TP + FP + FN + TN + part1 = 0.5 * (max(TP + FP, FN + TN) + max(TP + FN, FP + TN)) + return (TP + TN - part1) / (n - part1) + except Exception: + return "None" + + +def GuttmanLambdaA_calc(TP, FP, FN, TN): + """ + Calculate Guttman's Lambda A similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Guttman's Lambda A similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = max(TP, FN) + max(FP, TN) + part2 = max(TP + FP, FN + TN) + return (part1 - part2) / (n - part2) + except Exception: + return "None" + + +def GuttmanLambdaB_calc(TP, FP, FN, TN): + """ + Calculate Guttman's Lambda B similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Guttman's Lambda B similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = max(TP, FP) + max(FN, TN) + part2 = max(TP + FN, FP + TN) + return (part1 - part2) / (n - part2) + except Exception: + return "None" + + +def Hamann_calc(TP, FP, FN, TN): + """ + Calculate Hamann correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Hamann correlation as float + """ + try: + n = TP + FP + FN + TN + + return (TP + TN - FP - FN) / n + except Exception: + return "None" + + +def HarrisLahey_calc(TP, FP, FN, TN): + """ + Calculate Harris & Lahey similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Harris & Lahey similarity as float + """ + try: + n = TP + FP + FN + TN + part1 = TP / (TP + FP + FN) + part2 = (2 * TN + FP + FN) / (2 * n) + part3 = TN / (TN + FP + FN) + part4 = (2 * TP + FP + FN) / (2 * n) + return part1 * part2 + part3 * part4 + except Exception: + return "None" + + +def HawkinsDotson_calc(TP, FP, FN, TN): + """ + Calculate Hawkins & Dotson similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Hawkins & Dotson similarity as float + """ + try: + return 0.5 * ((TP / (TP + FP + FN)) + (TN / (TN + FN + FP))) + except Exception: + return "None" + + +def KendallTau_calc(TP, FP, FN, TN): + """ + Calculate Kendall's Tau correlation. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Kendall's Tau correlation as float + """ + try: + n = TP + FP + FN + TN + return (2 * (TP + TN - FP - FN)) / (n * (n - 1)) + except Exception: + return "None" + + +def KentFosterI_calc(TP, FP, FN, TN): + """ + Calculate Kent & Foster I similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Kent & Foster I similarity as float + """ + try: + part1 = ((TP + FP) * (TP + FN)) / (TP + FP + FN) + return (TP - part1) / (TP - part1 + FP + FN) + except Exception: + return "None" + + +def KentFosterII_calc(TP, FP, FN, TN): + """ + Calculate Kent & Foster II similarity. + + :param TP: true positive + :type TP: int + :param TN: true negative + :type TN: int + :param FP: false positive + :type FP: int + :param FN: false negative + :type FN: int + :return: Kent & Foster II similarity as float + """ + try: + part1 = ((TN + FP) * (TN + FN)) / (TN + FP + FN) + return (TN - part1) / (TN - part1 + FP + FN) + except Exception: + return "None" + + DISTANCE_MAPPER = { DistanceType.AMPLE: AMPLE_calc, DistanceType.Anderberg: Anderberg_calc, @@ -710,4 +1248,27 @@ def ConsonniTodeschiniV_calc(TP, FP, FN, TN): DistanceType.ConsonniTodeschiniIII: ConsonniTodeschiniIII_calc, DistanceType.ConsonniTodeschiniIV: ConsonniTodeschiniIV_calc, DistanceType.ConsonniTodeschiniV: ConsonniTodeschiniV_calc, + DistanceType.Dennis: Dennis_calc, + DistanceType.Digby: Digby_calc, + DistanceType.Dispersion: Dispersion_calc, + DistanceType.Doolittle: Doolittle_calc, + DistanceType.Eyraud: Eyraud_calc, + DistanceType.FagerMcGowan: FagerMcGowan_calc, + DistanceType.Faith: Faith_calc, + DistanceType.FleissLevinPaik: FleissLevinPaik_calc, + DistanceType.ForbesI: ForbesI_calc, + DistanceType.ForbesII: ForbesII_calc, + DistanceType.Fossum: Fossum_calc, + DistanceType.GilbertWells: GilbertWells_calc, + DistanceType.Goodall: Goodall_calc, + DistanceType.GoodmanKruskalLambda: GoodmanKruskalLambda_calc, + DistanceType.GoodmanKruskalLambdaR: GoodmanKruskalLambdaR_calc, + DistanceType.GuttmanLambdaA: GuttmanLambdaA_calc, + DistanceType.GuttmanLambdaB: GuttmanLambdaB_calc, + DistanceType.Hamann: Hamann_calc, + DistanceType.HarrisLahey: HarrisLahey_calc, + DistanceType.HawkinsDotson: HawkinsDotson_calc, + DistanceType.KendallTau: KendallTau_calc, + DistanceType.KentFosterI: KentFosterI_calc, + DistanceType.KentFosterII: KentFosterII_calc } diff --git a/pycm/pycm_output.py b/pycm/pycm_output.py index e3e1a541..93451031 100644 --- a/pycm/pycm_output.py +++ b/pycm/pycm_output.py @@ -519,5 +519,5 @@ def online_help(param=None, alt_link=False): print('Example : online_help("J") or online_help(2)\n') for index, item in enumerate(params_link_keys): print(str(index + 1) + "-" + item) - except Exception: # pragma: no cover + except Exception: # pragma: no cover print("Error in online help") diff --git a/pycm/pycm_overall_func.py b/pycm/pycm_overall_func.py index c83bdcbc..af46ed60 100644 --- a/pycm/pycm_overall_func.py +++ b/pycm/pycm_overall_func.py @@ -1036,7 +1036,8 @@ def overall_statistics(**kwargs): result["SOA6(Matthews)"] = MCC_analysis(result["Overall MCC"]) result["SOA7(Lambda A)"] = lambda_analysis(result["Lambda A"]) result["SOA8(Lambda B)"] = lambda_analysis(result["Lambda B"]) - result["SOA9(Krippendorff Alpha)"] = alpha_analysis(result["Krippendorff Alpha"]) + result["SOA9(Krippendorff Alpha)"] = alpha_analysis( + result["Krippendorff Alpha"]) result["SOA10(Pearson C)"] = pearson_C_analysis(result["Pearson C"]) result["FPR Macro"] = complement(result["TNR Macro"]) result["FNR Macro"] = complement(result["TPR Macro"]) diff --git a/pycm/pycm_param.py b/pycm/pycm_param.py index 54b4849e..35a7b483 100644 --- a/pycm/pycm_param.py +++ b/pycm/pycm_param.py @@ -772,7 +772,7 @@ "Medium": "LawnGreen", "Strong": "Green", "None": "White"} - } +} BENCHMARK_LIST = list(BENCHMARK_COLOR.keys()) diff --git a/pycm/pycm_util.py b/pycm/pycm_util.py index 6252ca51..c13748b3 100644 --- a/pycm/pycm_util.py +++ b/pycm/pycm_util.py @@ -719,6 +719,20 @@ def thresholds_calc(probs): return thresholds +def char_num_transformer(input_item): + """ + Transform the input string to a proper key for char-num sorting. + + :param input_item: input item + :type input_item: str + :return: key as tuple + """ + return [(input_item, False, False) if not re.findall(r'\d+', input_item) + else (input_item[:re.search(r'\d+', input_item).start()], + int(re.findall(r'\d+', input_item)[0]), + input_item[re.search(r'\d+', input_item).end():])] + + def sort_char_num(input_list): """ Sort a list of strings first alphabetically and then numerically. @@ -727,8 +741,4 @@ def sort_char_num(input_list): :type input_list: iterable :return: a sorted list of strings """ - sort_by = lambda x: [(x, False, False) if not re.findall(r'\d+', x) - else (x[:re.search(r'\d+', x).start()], - int(re.findall(r'\d+', x)[0]), - x[re.search(r'\d+', x).end():])] - return sorted(input_list, key=sort_by) + return sorted(input_list, key=char_num_transformer)