From ca2b4edc38d3eeda9c5e1156875b2d4938496595 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 16 Oct 2023 11:30:55 -0400
Subject: [PATCH 1/7] Add github action to codespell main on push and PRs

---
 .github/workflows/codespell.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 000000000..3ebbf5504
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2

From 4886fda27508f097278adecea945a3462f801e75 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 16 Oct 2023 11:30:55 -0400
Subject: [PATCH 2/7] Add rudimentary codespell config

---
 pyproject.toml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 7f6aebc0c..028f97461 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,3 +104,9 @@ reverse_relative = true
 [build-system]
 requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
 build-backend = "poetry_dynamic_versioning.backend"
+
+[tool.codespell]
+skip = '.git,*.pdf,*.svg,poetry.lock,output,*.tsv'
+# some specific phrases, variables and mixed case (CamelCase etc)
+ignore-regex = '\b(Torsades de pointes|[A-Z][a-zA-Z]*|[a-z]+[A-Z][a-zA-Z]*)\b'
+ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions'

From b3ae901a64f6b817d4704c43f02e751d79081e1d Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 6 Dec 2024 19:36:30 -0500
Subject: [PATCH 3/7] Do not ignore folder with already committed to git
 tests/input/training

This complicates commit of modifications
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 0ed907210..98a45a25d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,6 +141,7 @@ tests/output/eval-*
 
 tasks/
 training/
+!tests/input/training/
 preserved/
 random
 LOG

From 6a03cc749421d516ae6e7a6cb1b3ad99bebc3a3a Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 6 Dec 2024 19:39:37 -0500
Subject: [PATCH 4/7] More of fixups

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 028f97461..8ba6afb2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,5 +108,5 @@ build-backend = "poetry_dynamic_versioning.backend"
 [tool.codespell]
 skip = '.git,*.pdf,*.svg,poetry.lock,output,*.tsv'
 # some specific phrases, variables and mixed case (CamelCase etc)
-ignore-regex = '\b(Torsades de pointes|[A-Z][a-zA-Z]*|[a-z]+[A-Z][a-zA-Z]*)\b'
-ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions'
+ignore-regex = '\b(Torsades de pointes|[A-Z][a-zA-Z]*|[a-z]+[A-Z][a-zA-Z]*|de pointes)\b|\bcommments:'
+ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions,euclidian,dependant'

From 2588571e3905c490a1acd8c455e18dd1d018f07c Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Wed, 18 Dec 2024 08:53:25 -0500
Subject: [PATCH 5/7] Ignore old and go-nucleus.json

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8ba6afb2f..83d2a4303 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -106,7 +106,7 @@ requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
 build-backend = "poetry_dynamic_versioning.backend"
 
 [tool.codespell]
-skip = '.git,*.pdf,*.svg,poetry.lock,output,*.tsv'
+skip = '.git,*.pdf,*.svg,poetry.lock,output,*.tsv,./tests/input,old'
 # some specific phrases, variables and mixed case (CamelCase etc)
 ignore-regex = '\b(Torsades de pointes|[A-Z][a-zA-Z]*|[a-z]+[A-Z][a-zA-Z]*|de pointes)\b|\bcommments:'
 ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions,euclidian,dependant'

From 77bca8e6efad059dd654979e152eea07caf5e592 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Wed, 18 Dec 2024 08:54:11 -0500
Subject: [PATCH 6/7] [DATALAD RUNCMD] run codespell throughout fixing few left
 typos automagically

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 Makefile                                             | 2 +-
 docs/custom.md                                       | 2 +-
 docs/functions.md                                    | 2 +-
 docs/troubleshooting.md                              | 2 +-
 notebooks/BioEPIC_demo.ipynb                         | 4 ++--
 src/ontogpt/cli.py                                   | 4 ++--
 src/ontogpt/clients/pubmed_client.py                 | 2 +-
 src/ontogpt/evaluation/drugmechdb/eval_drugmechdb.py | 2 +-
 src/ontogpt/templates/dietitian_notes.yaml           | 2 +-
 src/ontogpt/templates/ecosim_methods.py              | 2 +-
 src/ontogpt/templates/ecosim_methods.yaml            | 2 +-
 src/ontogpt/templates/pathology.py                   | 2 +-
 src/ontogpt/templates/pathology.yaml                 | 2 +-
 src/ontogpt/templates/recipe.yaml                    | 2 +-
 src/ontogpt/utils/pymupdf_helpers.py                 | 2 +-
 15 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index 8a1a1afcb..347a6cccb 100644
--- a/Makefile
+++ b/Makefile
@@ -57,7 +57,7 @@ gh-deploy:
 all_recipes: tests/output/owl/merged/recipe-all-merged.owl
 
 # prefix with 'web' for a URL in recipe-urls.csv
-# prefix wiyth 'case' for a previously downloaded recipe in cases/ directory
+# prefix with 'case' for a previously downloaded recipe in cases/ directory
 RECIPES = case-spaghetti case-egg-noodles case-tortilla-soup \
  web-spinach-and-feta-turkey-burgers \
  web-shrimp-and-cheesy-grits-with-bacon \
diff --git a/docs/custom.md b/docs/custom.md
index 42b2f6790..bc2ef3340 100644
--- a/docs/custom.md
+++ b/docs/custom.md
@@ -469,4 +469,4 @@ For example, if your schema is named `albatross.yaml`, then an extract command i
 ontogpt extract -t albatross.yaml -i input.txt
 ```
 
-Running this (or any other command including your custom schema) will install it for future use with OntoGPT, so in subsquent commands it can be referred to by its name (e.g., `albatross`, without the file extension or a full filepath).
+Running this (or any other command including your custom schema) will install it for future use with OntoGPT, so in subsequent commands it can be referred to by its name (e.g., `albatross`, without the file extension or a full filepath).
diff --git a/docs/functions.md b/docs/functions.md
index cadec94e6..536d5c9a6 100644
--- a/docs/functions.md
+++ b/docs/functions.md
@@ -194,7 +194,7 @@ Including an instruction like the following anecdotally helps to avoid parsing f
 
 ### selectcols
 
-Use the option `selectcols` to specify exact colums to use when parsing tabular files as input.
+Use the option `selectcols` to specify exact columns to use when parsing tabular files as input.
 
 Example:
 
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 473a335e5..f5eb4094b 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -20,7 +20,7 @@ OntoGPT uses `oaklib` to handle the ontologies it uses as annotators, and `oakli
 
 To change the download location, set the `PYSTOW_HOME` variable in your environment to your preferred path.
 
-For example, to save downloads to `/tmp/oaklib`, set the varible like this:
+For example, to save downloads to `/tmp/oaklib`, set the variable like this:
 
 ```bash
 export PYSTOW_HOME='/tmp/'
diff --git a/notebooks/BioEPIC_demo.ipynb b/notebooks/BioEPIC_demo.ipynb
index 7780d5358..d6288f4a6 100644
--- a/notebooks/BioEPIC_demo.ipynb
+++ b/notebooks/BioEPIC_demo.ipynb
@@ -18,7 +18,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The following examples demonstrate basic functionality of OntoGPT and the SPIRES method for extracting and integrating data (i.e., concepts and relationships) from texts in the envrionmental and earth science domains.\n",
+    "The following examples demonstrate basic functionality of OntoGPT and the SPIRES method for extracting and integrating data (i.e., concepts and relationships) from texts in the environmental and earth science domains.\n",
     "These examples assume use of the LBNL CBORG computing resource."
    ]
   },
@@ -225,7 +225,7 @@
     "          A semicolon-separated list of variables measured in\n",
     "          environmental and earth science research. Examples\n",
     "          include: root shape, biomass, water turbidity\n",
-    "      equipments:\n",
+    "      equipment:\n",
     "        range: Equipment\n",
     "        description: >-\n",
     "          A semicolon-separated list of equipment used in\n",
diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
index 13dc83a52..c6705656e 100644
--- a/src/ontogpt/cli.py
+++ b/src/ontogpt/cli.py
@@ -1208,7 +1208,7 @@ def synonyms(
 
     ontogpt synonyms -m ollama/llama3 --context "political" "abdicate"
 
-    ontogpt synonyms -m ollama/llama3 --context "biological" "dessicate"
+    ontogpt synonyms -m ollama/llama3 --context "biological" "desiccate"
 
     """
     logging.info(f"Creating for {term}")
@@ -2207,7 +2207,7 @@ def list_models():
 
     Max Tokens: Token limit for the model. Note that models may
     tokenize text differently and calculate input and/or output tokens
-    in particular ways, so consult a model's original documentaion for
+    in particular ways, so consult a model's original documentation for
     further details.
     """
     models = get_model_cost_map("")
diff --git a/src/ontogpt/clients/pubmed_client.py b/src/ontogpt/clients/pubmed_client.py
index 21e48e89f..251700726 100644
--- a/src/ontogpt/clients/pubmed_client.py
+++ b/src/ontogpt/clients/pubmed_client.py
@@ -183,7 +183,7 @@ def text(
         :param ids: List of PubMed IDs, or string with single PMID
         :param raw: if True, do not parse the xml, just return the raw output with tags
         :param autoformat: if True include title and abstract concatenated
-        :param pubmedcentral: if True, retreive text from PubMed Central where possible
+        :param pubmedcentral: if True, retrieve text from PubMed Central where possible
         :return: the text of a single entry, or a list of strings for text of multiple entries
         """
         batch_size = 200
diff --git a/src/ontogpt/evaluation/drugmechdb/eval_drugmechdb.py b/src/ontogpt/evaluation/drugmechdb/eval_drugmechdb.py
index 65cc23753..b5fd3a6ab 100644
--- a/src/ontogpt/evaluation/drugmechdb/eval_drugmechdb.py
+++ b/src/ontogpt/evaluation/drugmechdb/eval_drugmechdb.py
@@ -41,7 +41,7 @@ def _fix_source_mechanism(mechanism_dict: dict) -> dict:
     g["id"] = g["_id"]
     del g["_id"]
     # normalize alt_ids
-    bad_fields = ["all_id", "alt_name", "alt-name", "comemt", "comemnt"]
+    bad_fields = ["all_id", "alt_name", "alt-name", "comemt", "comment"]
     for n in mechanism_dict["nodes"]:
         if "alt_ids" in n and isinstance(n["alt_ids"], str):
             n["alt_ids"] = [n["alt_ids"]]
diff --git a/src/ontogpt/templates/dietitian_notes.yaml b/src/ontogpt/templates/dietitian_notes.yaml
index a233ffbac..22db3944d 100644
--- a/src/ontogpt/templates/dietitian_notes.yaml
+++ b/src/ontogpt/templates/dietitian_notes.yaml
@@ -294,7 +294,7 @@ classes:
         range: string
 
   # TODO: distinguish whether this is currently active therapy
-  #       or a reccomendation for future therapy (but not yet started)
+  #       or a recommendation for future therapy (but not yet started)
   TherapeuticMaterial:
     description: >-
       A specific material added to a patient's diet or
diff --git a/src/ontogpt/templates/ecosim_methods.py b/src/ontogpt/templates/ecosim_methods.py
index b3280f677..3ee82faab 100644
--- a/src/ontogpt/templates/ecosim_methods.py
+++ b/src/ontogpt/templates/ecosim_methods.py
@@ -198,7 +198,7 @@ class TermSet(NamedEntity):
     locations: Optional[List[str]] = Field(None, description="""A semicolon-separated list of research locations. Examples include: Vermont, New York City, Ethiopia""", json_schema_extra = { "linkml_meta": {'alias': 'locations', 'domain_of': ['TermSet']} })
     methods: Optional[List[str]] = Field(None, description="""A semicolon-separated list of methods used in environmental and earth science research. Examples include: sampling, spectroscopy""", json_schema_extra = { "linkml_meta": {'alias': 'methods', 'domain_of': ['TermSet']} })
     variables: Optional[str] = Field(None, description="""A semicolon-separated list of variables measured in environmental and earth science research. Examples include: root shape, biomass, water turbidity""", json_schema_extra = { "linkml_meta": {'alias': 'variables', 'domain_of': ['TermSet']} })
-    equipments: Optional[str] = Field(None, description="""A semicolon-separated list of equipment used in environmental and earth science research.""", json_schema_extra = { "linkml_meta": {'alias': 'equipments', 'domain_of': ['TermSet']} })
+    equipment: Optional[str] = Field(None, description="""A semicolon-separated list of equipment used in environmental and earth science research.""", json_schema_extra = { "linkml_meta": {'alias': 'equipment', 'domain_of': ['TermSet']} })
     equipment_to_variable_relationships: Optional[List[EquipmentMeasuresVariable]] = Field(None, description="""A semicolon separated list of relationships between specific equipment and variables they are used to measure as described in the input. Example: NMR spectrometer was used to measure chemical content""", json_schema_extra = { "linkml_meta": {'alias': 'equipment_to_variable_relationships', 'domain_of': ['TermSet']} })
     id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
          'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
diff --git a/src/ontogpt/templates/ecosim_methods.yaml b/src/ontogpt/templates/ecosim_methods.yaml
index 5900af1e7..031f201ab 100644
--- a/src/ontogpt/templates/ecosim_methods.yaml
+++ b/src/ontogpt/templates/ecosim_methods.yaml
@@ -42,7 +42,7 @@ classes:
           A semicolon-separated list of variables measured in
           environmental and earth science research. Examples
           include: root shape, biomass, water turbidity
-      equipments:
+      equipment:
         range: Equipment
         description: >-
           A semicolon-separated list of equipment used in
diff --git a/src/ontogpt/templates/pathology.py b/src/ontogpt/templates/pathology.py
index 71b8a645e..cdb85036d 100644
--- a/src/ontogpt/templates/pathology.py
+++ b/src/ontogpt/templates/pathology.py
@@ -318,7 +318,7 @@ class PathologyReport(ConfiguredBaseModel):
     """
     linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/pathology', 'tree_root': True})
 
-    pathology_statements: Optional[List[PathologyStatement]] = Field(None, description="""A semicolon-delimited list of pathology statements, each describing a pathology, including any diagnoses, one or more specific qualities being measured and the anatomical location or tissue the pathology is measured in. If any of the pathology statements are negative, the negation should be included in each statment, e.g., \"no granulomas or viropathic changes\" should become \"no granulomas\" and \"no viropathic changes\".""", json_schema_extra = { "linkml_meta": {'alias': 'pathology_statements', 'domain_of': ['PathologyReport']} })
+    pathology_statements: Optional[List[PathologyStatement]] = Field(None, description="""A semicolon-delimited list of pathology statements, each describing a pathology, including any diagnoses, one or more specific qualities being measured and the anatomical location or tissue the pathology is measured in. If any of the pathology statements are negative, the negation should be included in each statement, e.g., \"no granulomas or viropathic changes\" should become \"no granulomas\" and \"no viropathic changes\".""", json_schema_extra = { "linkml_meta": {'alias': 'pathology_statements', 'domain_of': ['PathologyReport']} })
     is_benign: Optional[str] = Field(None, description="""Whether the overall pathology appears to be benign and not malignant. Other pathologies may be present, but if tissue is described as benign and/or if a carcinoma is explicitly excluded, this value should be true. A statement of \"no significant pathologic abnormality\" or the short form \"nspa\" would also have a value of true. It it otherwise 'unclear'.""", json_schema_extra = { "linkml_meta": {'alias': 'is_benign',
          'annotations': {'prompt.example': {'tag': 'prompt.example',
                                             'value': 'true, false, unclear'}},
diff --git a/src/ontogpt/templates/pathology.yaml b/src/ontogpt/templates/pathology.yaml
index 150a3cbe6..b1a343f8a 100644
--- a/src/ontogpt/templates/pathology.yaml
+++ b/src/ontogpt/templates/pathology.yaml
@@ -43,7 +43,7 @@ classes:
           pathology, including any diagnoses, one or more specific qualities
           being measured and the anatomical location or tissue the pathology is
           measured in. If any of the pathology statements are negative, the
-          negation should be included in each statment, e.g., "no granulomas or
+          negation should be included in each statement, e.g., "no granulomas or
           viropathic changes" should become "no granulomas" and "no viropathic
           changes".
         range: PathologyStatement
diff --git a/src/ontogpt/templates/recipe.yaml b/src/ontogpt/templates/recipe.yaml
index 04f2646c3..49b97c376 100644
--- a/src/ontogpt/templates/recipe.yaml
+++ b/src/ontogpt/templates/recipe.yaml
@@ -18,7 +18,7 @@ prefixes:
   qudt: http://qudt.org/schema/qudt/
   dbpediaont: http://dbpedia.org/ontology/
 
-# This template incorportates syntax from
+# This template incorporates syntax from
 # linkml-owl to define OWL interpretations
 # and enable advanced functionality.
 # https://linkml.io/linkml-owl/templates/
diff --git a/src/ontogpt/utils/pymupdf_helpers.py b/src/ontogpt/utils/pymupdf_helpers.py
index 9b909c2c7..bec15c643 100644
--- a/src/ontogpt/utils/pymupdf_helpers.py
+++ b/src/ontogpt/utils/pymupdf_helpers.py
@@ -106,7 +106,7 @@ def fonts(doc, granularity=False):
 def font_tags(font_counts, styles):
     """Return dictionary with font sizes as keys and tags as value.
 
-    :param font_counts: (font_size, count) for all fonts occuring in document
+    :param font_counts: (font_size, count) for all fonts occurring in document
     :type font_counts: list
     :param styles: all styles found in the document
     :type styles: dict

From 349afc03912a3d672b4dddcb2784c0c243dbaa0a Mon Sep 17 00:00:00 2001
From: Harry Caufield <j.harry.caufield@gmail.com>
Date: Wed, 18 Dec 2024 13:48:35 -0500
Subject: [PATCH 7/7] Add 'vrsatile' to ignored words

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 83d2a4303..5733d35c8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,4 +109,4 @@ build-backend = "poetry_dynamic_versioning.backend"
 skip = '.git,*.pdf,*.svg,poetry.lock,output,*.tsv,./tests/input,old'
 # some specific phrases, variables and mixed case (CamelCase etc)
 ignore-regex = '\b(Torsades de pointes|[A-Z][a-zA-Z]*|[a-z]+[A-Z][a-zA-Z]*|de pointes)\b|\bcommments:'
-ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions,euclidian,dependant'
+ignore-words-list = 'langual,sting,infarction,holliday,cyclin,convertor,ser,collapsin,infarctions,euclidian,dependant,vrsatile'