diff --git a/tests/taxonomy.py b/tests/taxonomy.py
index f2b6cb47..227c2534 100644
--- a/tests/taxonomy.py
+++ b/tests/taxonomy.py
@@ -2,11 +2,12 @@
 
 # Standard
 from pathlib import Path
-from typing import List
+from typing import Any, Dict, List
 import shutil
 
 # Third Party
 import git
+import yaml
 
 
 class MockTaxonomy:
@@ -25,12 +26,12 @@ def untracked_files(self) -> List[str]:
         """List untracked files in the repository"""
         return self._repo.untracked_files
 
-    def create_untracked(self, rel_path: str, contents: str) -> Path:
+    def create_untracked(self, rel_path: str, contents: Dict[str, Any]) -> Path:
         """Create a new untracked file in the repository.
 
         Args:
             rel_path (str): Relative path (from repository root) to the file.
-            contents (str): String to be written to the file.
+            contents (Dict[str, Any]): Object to be written to the file.
         Returns:
             file_path: The path to the created file.
         """
@@ -38,15 +39,16 @@ def create_untracked(self, rel_path: str, contents: str) -> Path:
         assert not taxonomy_path.is_absolute()
         file_path = self.root.joinpath(taxonomy_path)
         file_path.parent.mkdir(exist_ok=True, parents=True)
-        file_path.write_text(contents, encoding="utf-8")
+        with file_path.open(mode="w", encoding="utf-8") as fp:
+            yaml.dump(contents, fp)
         return file_path
 
-    def add_tracked(self, rel_path, contents: str) -> Path:
-        """Add a new tracked file to the repository (and commits it).
+    def add_tracked(self, rel_path, contents: Dict[str, Any]) -> Path:
+        """Add a new tracked file to the repository (and commit it).
 
         Args:
             rel_path (str): Relative path (from repository root) to the file.
-            contents (str): String to be written to the file.
+            contents (Dict[str, Any]): Object to be written to the file.
         Returns:
             file_path: The path to the added file.
         """
diff --git a/tests/test_generate_data.py b/tests/test_generate_data.py
index 33d21e8f..98b87780 100644
--- a/tests/test_generate_data.py
+++ b/tests/test_generate_data.py
@@ -3,12 +3,462 @@
 """
 
 # Standard
-from unittest import mock
+from contextlib import contextmanager
+from typing import Any, Dict, Union
+from unittest.mock import MagicMock, patch
+import glob
+import json
+import os
+import shutil
+import tempfile
+import unittest
+
+# Third Party
+from datasets import load_dataset
+import pytest
+import yaml
 
 # First Party
-from instructlab.sdg.generate_data import _context_init
+from instructlab.sdg.generate_data import _SYS_PROMPT, _context_init, generate_data
+from instructlab.sdg.llmblock import LLMBlock
 from instructlab.sdg.pipeline import PipelineContext
 
+TEST_TAXONOMY_BASE = "main"
+
+TEST_CUSTOM_YAML_RULES = b"""extends: relaxed
+rules:
+  line-length:
+    max: 180
+"""
+
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "testdata")
+
+NUM_INSTRUCTIONS_TO_GENERATE = 10
+
+
+def validate_legacy_dataset(dataset_file_name, expected_samples):
+    """Test dataset in the "legacy message sample" format.
+
+    See LegacyMessageSample in instructlab/instructlab.
+
+      system: str
+      user: str
+      assistant: str
+
+    This is what is currently used by the legacy training methods such as Linux training and MacOS training.
+    """
+    ds = load_dataset("json", data_files=dataset_file_name, split="train")
+    features = ["system", "user", "assistant"]
+    assert len(ds.features) == len(features)
+    for feature in features:
+        assert feature in ds.features
+        assert ds.features[feature].dtype == "string"
+
+    for idx, sample in enumerate(expected_samples):
+        assert ds[idx]["system"] == _SYS_PROMPT
+        assert ds[idx]["user"] == sample["user"]
+        assert ds[idx]["assistant"] == sample["assistant"]
+
+
+def validate_messages_dataset(dataset_file_name, expected_samples):
+    """Test dataset in the Hugging Face messages format
+
+    See MessageSample in instructlab/instructlab.
+
+      messages:
+        content: str
+        # one of: "user", "assistant", or "system"
+        role: str
+    """
+    ds = load_dataset("json", data_files=dataset_file_name, split="train")
+    assert len(ds.features) == 2
+    assert len(ds.features["messages"]) == 1
+    assert len(ds.features["messages"][0]) == 2
+    assert ds.features["messages"][0]["content"].dtype == "string"
+    assert ds.features["messages"][0]["role"].dtype == "string"
+    assert ds.features["metadata"].dtype == "string"
+
+    for idx, sample in enumerate(expected_samples):
+        assert len(ds[idx]["messages"]) == 2
+        assert ds[idx]["messages"][0]["role"] == "user"
+        assert ds[idx]["messages"][0]["content"] == sample["user"]
+        assert ds[idx]["messages"][1]["role"] == "assistant"
+        assert ds[idx]["messages"][1]["content"] == sample["assistant"]
+        assert ds[idx]["metadata"] == json.dumps({"system": _SYS_PROMPT})
+
+
+def validate_skill_leaf_node_dataset(dataset_file_name):
+    ds = load_dataset("json", data_files=dataset_file_name, split="train")
+    assert len(ds.features) == 7
+    features = [
+        "task_description",
+        "seed_context",
+        "seed_question",
+        "seed_response",
+        "output",
+        "id",
+    ]
+    for feature in features:
+        assert feature in ds.features
+        assert ds.features[feature].dtype == "string"
+    assert "messages" in ds.features
+    assert len(ds.features["messages"]) == 1
+    assert len(ds.features["messages"][0]) == 2
+    assert ds.features["messages"][0]["content"].dtype == "string"
+    assert ds.features["messages"][0]["role"].dtype == "string"
+
+
+def validate_phase_leaf_node_dataset(dataset_file_name):
+    ds = load_dataset("json", data_files=dataset_file_name, split="train")
+    assert len(ds.features) == 3
+    features = ["metadata", "id"]
+    for feature in features:
+        assert feature in ds.features
+        assert ds.features[feature].dtype == "string"
+    assert "messages" in ds.features
+    assert len(ds.features["messages"]) == 1
+    assert len(ds.features["messages"][0]) == 2
+    assert ds.features["messages"][0]["content"].dtype == "string"
+    assert ds.features["messages"][0]["role"].dtype == "string"
+
+
+def validate_recipe(recipe_file_name):
+    with open(recipe_file_name, encoding="utf-8") as fp:
+        yaml_contents = yaml.safe_load(fp)
+        assert len(yaml_contents["datasets"]) == 1
+        assert yaml_contents["datasets"][0]["path"].endswith(".jsonl")
+        assert "sampling_size" in yaml_contents["datasets"][0]
+        assert yaml_contents["metadata"]["sys_prompt"] == _SYS_PROMPT
+
+
+def validate_mixed_dataset(dataset_file_name):
+    ds = load_dataset("json", data_files=dataset_file_name, split="train")
+    assert "messages" in ds.features
+    assert len(ds.features["messages"]) == 1
+    assert len(ds.features["messages"][0]) == 2
+    assert ds.features["messages"][0]["content"].dtype == "string"
+    assert ds.features["messages"][0]["role"].dtype == "string"
+
+
+def validate_lm_eval_task(lm_eval_task_file_name):
+    with open(lm_eval_task_file_name, encoding="utf-8") as fp:
+        yaml_contents = yaml.safe_load(fp)
+        assert "task" in yaml_contents
+        assert "dataset_kwargs" in yaml_contents
+        assert "doc_to_text" in yaml_contents
+        assert "doc_to_choice" in yaml_contents
+        assert "doc_to_target" in yaml_contents
+
+
+def validate_mmlubench_dataset(dataset_file_name):
+    with open(dataset_file_name, encoding="utf-8") as fp:
+        # FIXME: fix the mmlubench pipeline in this test
+        assert fp.readlines() == []
+
+
+def generate_test_samples(yaml_contents):
+    """Convert questions and answers from the taxonomy format into the
+    user/assistant format used by the legacy training methods such as
+    Linux training and MacOS training.
+
+    This mirrors what _gen_test_data() does.
+    """
+    test_samples = []
+    is_knowledge = "document" in yaml_contents
+    for seed_example in yaml_contents["seed_examples"]:
+        if is_knowledge:
+            for qna in seed_example["questions_and_answers"]:
+                test_samples.append(
+                    {
+                        "user": qna["question"]
+                        + "\n"
+                        + seed_example["context"].strip(),
+                        "assistant": qna["answer"].strip(),
+                    }
+                )
+
+        else:
+            # FIXME: handle freeform skills - no context
+            test_samples.append(
+                {
+                    "user": seed_example["question"] + "\n" + seed_example["context"],
+                    "assistant": seed_example["answer"],
+                }
+            )
+    return test_samples
+
+
+def generate_train_samples(yaml_contents):
+    """Generate expected training samples in the user/assistant format
+    used by the legacy training methods such as Linux training and MacOS
+    training.
+
+    Mirroring _noop_llmblock_generate() below, we generate 10 samples
+    per input, and then follow _gen_train_data()'s output format.
+    """
+
+    def add_question_mark(q):
+        return (q + "?") if not "?" in q else q
+
+    train_samples = []
+    is_knowledge = "document" in yaml_contents
+    for seed_example in yaml_contents["seed_examples"]:
+        for i in range(NUM_INSTRUCTIONS_TO_GENERATE):
+            if is_knowledge:
+                train_samples.append(
+                    {
+                        "user": seed_example["context"]
+                        + f" (q{i}) "
+                        + add_question_mark(
+                            seed_example["questions_and_answers"][0]["question"].strip()
+                        ),
+                        "assistant": f"(a{i}) "
+                        + seed_example["questions_and_answers"][0]["answer"].strip(),
+                    }
+                )
+            else:
+                # FIXME: handle freeform skills - no context
+                train_samples.append(
+                    {
+                        "user": seed_example["context"]
+                        + f" (q{i}) "
+                        + add_question_mark(seed_example["question"]),
+                        "assistant": f"(a{i}) " + seed_example["answer"],
+                    }
+                )
+    return train_samples
+
+
+def load_test_skills(skills_file_path) -> Union[Dict[str, Any], None]:
+    with open(skills_file_path, "r", encoding="utf-8") as skills_file:
+        return yaml.safe_load(skills_file)
+
+
+def _noop_llmblock_generate(self, samples):
+    """Generate mock output based on input samples.
+
+    Simply return the seed question and response from the input sample,
+    joined using '?' and with an integer discriminator.
+
+    _get_question_hack() and _get_response_hack() is the code that later
+    splits these using the '?' separator.
+
+    Return 10 output samples per input samples, since the LLMBlock in the
+    simple pipeline is configured with 'n: scaled' and we pass
+    num_instructions_to_generate=10 to generate_data.
+    """
+
+    def strip_q(q):
+        return q.strip().rstrip("?")
+
+    output = []
+    for sample in samples:
+        for i in range(NUM_INSTRUCTIONS_TO_GENERATE):
+            if "domain" in sample:  # knowledge
+                output.append(
+                    sample["icl_document"]
+                    + f" (q{i}) "
+                    + strip_q(sample["icl_query_1"])
+                    + f" ? (a{i}) "
+                    + sample["icl_response_1"]
+                )
+            else:
+                output.append(
+                    sample["seed_context"]
+                    + f" (q{i}) "
+                    + strip_q(sample["seed_question"])
+                    + f" ? (a{i}) "
+                    + sample["seed_response"]
+                )
+    return output
+
+
+@patch.object(LLMBlock, "_generate", _noop_llmblock_generate)
+class TestGenerateCompositionalData(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def _init_taxonomy(self, taxonomy_dir):
+        self.test_taxonomy = taxonomy_dir
+
+    def setUp(self):
+        self.tmp_path = tempfile.TemporaryDirectory().name
+        test_valid_compositional_skill_file = os.path.join(
+            TEST_DATA_DIR, "test_valid_compositional_skill.yaml"
+        )
+        tracked_compositional_file = os.path.join(
+            "compositional_skills", "tracked", "qna.yaml"
+        )
+        untracked_compositional_file = os.path.join(
+            "compositional_skills", "new", "qna.yaml"
+        )
+        test_valid_compositional_skill = load_test_skills(
+            test_valid_compositional_skill_file
+        )
+        self.test_taxonomy.add_tracked(
+            tracked_compositional_file, test_valid_compositional_skill
+        )
+        self.test_taxonomy.create_untracked(
+            untracked_compositional_file, test_valid_compositional_skill
+        )
+        self.expected_test_samples = generate_test_samples(
+            test_valid_compositional_skill
+        )
+        self.expected_train_samples = generate_train_samples(
+            test_valid_compositional_skill
+        )
+
+    def test_generate(self):
+        with patch("logging.Logger.info") as mocked_logger:
+            generate_data(
+                mocked_logger,
+                model_family="merlinite",
+                model_name="models/merlinite-7b-lab-Q4_K_M.gguf",
+                num_instructions_to_generate=10,
+                taxonomy=self.test_taxonomy.root,
+                taxonomy_base=TEST_TAXONOMY_BASE,
+                output_dir=self.tmp_path,
+                yaml_rules=TEST_CUSTOM_YAML_RULES,
+                client=MagicMock(),
+                pipeline="simple",
+            )
+
+        for name in ["test_*.jsonl", "train_*.jsonl", "messages_*.jsonl"]:
+            matches = glob.glob(os.path.join(self.tmp_path, name))
+            assert len(matches) == 1
+            if name.startswith("test_"):
+                validate_legacy_dataset(matches[0], self.expected_test_samples)
+            elif name.startswith("train_"):
+                validate_legacy_dataset(matches[0], self.expected_train_samples)
+            elif name.startswith("messages_"):
+                validate_messages_dataset(matches[0], self.expected_train_samples)
+
+        node_file = os.path.join("node_datasets_*", "compositional_skills_new.jsonl")
+        for name in [
+            "skills_recipe_*.yaml",
+            "skills_train_msgs_*.jsonl",
+            node_file,
+        ]:
+            matches = glob.glob(os.path.join(self.tmp_path, name))
+            assert len(matches) == 1
+            if name.endswith("compositional_skills_new.jsonl"):
+                validate_skill_leaf_node_dataset(matches[0])
+            elif name.startswith("skills_recipe_"):
+                validate_recipe(matches[0])
+            elif name.startswith("skills_train_msgs_"):
+                validate_mixed_dataset(matches[0])
+
+    def teardown(self) -> None:
+        """Recursively remove the temporary repository and all of its
+        subdirectories and files.
+        """
+        shutil.rmtree(self.tmp_path)
+        return
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.teardown()
+
+
+@patch.object(LLMBlock, "_generate", _noop_llmblock_generate)
+class TestGenerateKnowledgeData(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def _init_taxonomy(self, taxonomy_dir):
+        self.test_taxonomy = taxonomy_dir
+
+    def setUp(self):
+        self.tmp_path = tempfile.TemporaryDirectory().name
+        test_valid_knowledge_skill_file = os.path.join(
+            TEST_DATA_DIR, "test_valid_knowledge_skill.yaml"
+        )
+        tracked_knowledge_file = os.path.join("knowledge  ", "tracked", "qna.yaml")
+        untracked_knowledge_file = os.path.join("knowledge", "new", "qna.yaml")
+        test_valid_knowledge_skill = load_test_skills(test_valid_knowledge_skill_file)
+        self.test_taxonomy.add_tracked(
+            tracked_knowledge_file, test_valid_knowledge_skill
+        )
+        self.test_taxonomy.create_untracked(
+            untracked_knowledge_file, test_valid_knowledge_skill
+        )
+        self.expected_test_samples = generate_test_samples(test_valid_knowledge_skill)
+        self.expected_train_samples = generate_train_samples(test_valid_knowledge_skill)
+
+    def test_generate(self):
+        with patch("logging.Logger.info") as mocked_logger:
+            generate_data(
+                mocked_logger,
+                model_family="merlinite",
+                model_name="models/merlinite-7b-lab-Q4_K_M.gguf",
+                num_instructions_to_generate=10,
+                taxonomy=self.test_taxonomy.root,
+                taxonomy_base=TEST_TAXONOMY_BASE,
+                output_dir=self.tmp_path,
+                yaml_rules=TEST_CUSTOM_YAML_RULES,
+                chunk_word_count=1000,
+                server_ctx_size=4096,
+                client=MagicMock(),
+                pipeline="simple",
+            )
+
+        for name in ["test_*.jsonl", "train_*.jsonl", "messages_*.jsonl"]:
+            matches = glob.glob(os.path.join(self.tmp_path, name))
+            assert len(matches) == 1
+            if name.startswith("test_"):
+                validate_legacy_dataset(matches[0], self.expected_test_samples)
+            elif name.startswith("train_"):
+                validate_legacy_dataset(matches[0], self.expected_train_samples)
+            elif name.startswith("messages_"):
+                validate_messages_dataset(matches[0], self.expected_train_samples)
+
+        node_p07_file = os.path.join("node_datasets_*", "knowledge_new_p07.jsonl")
+        node_p10_file = os.path.join("node_datasets_*", "knowledge_new_p10.jsonl")
+        for name in [
+            "skills_recipe_*.yaml",
+            "skills_train_*.jsonl",
+            "knowledge_recipe_*.yaml",
+            "knowledge_train_msgs_*.jsonl",
+            node_p07_file,
+            node_p10_file,
+        ]:
+            matches = glob.glob(os.path.join(self.tmp_path, name))
+            assert len(matches) == 1
+            if name.endswith("knowledge_new_p07.jsonl") or name.endswith(
+                "knowledge_new_p10.jsonl"
+            ):
+                validate_phase_leaf_node_dataset(matches[0])
+            elif name.startswith("skills_recipe_") or name.startswith(
+                "knowledge_recipe_"
+            ):
+                validate_recipe(matches[0])
+            elif name.startswith("skills_train_msgs_") or name.startswith(
+                "knowledge_train_msgs_"
+            ):
+                validate_mixed_dataset(matches[0])
+
+        for name in [
+            "knowledge_new_task.yaml",
+            "mmlubench_knowledge_new.jsonl",
+        ]:
+            matches = glob.glob(os.path.join(self.tmp_path, "node_datasets_*", name))
+            assert len(matches) == 1
+            if name == "knowledge_new_task.yaml":
+                validate_lm_eval_task(matches[0])
+            elif name == "mmlubench_knowledge_new.jsonl":
+                validate_mmlubench_dataset(matches[0])
+
+    def teardown(self) -> None:
+        """Recursively remove the temporary repository and all of its
+        subdirectories and files.
+        """
+        shutil.rmtree(self.tmp_path)
+        return
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.teardown()
+
 
 def test_context_init_batch_size_optional():
     """Test that the _context_init function can handle a missing batch size by
diff --git a/tests/test_taxonomy.py b/tests/test_taxonomy.py
index e8aa4bda..8c148113 100644
--- a/tests/test_taxonomy.py
+++ b/tests/test_taxonomy.py
@@ -1,36 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Standard
+from typing import Any, Dict, Union
 import os
 import pathlib
 
 # Third Party
 import pytest
+import yaml
 
 # First Party
 from instructlab.sdg.utils import taxonomy
 
-TEST_VALID_COMPOSITIONAL_SKILL_YAML = """created_by: rafael-vasquez
-version: 1
-seed_examples:
-- answer: "Sure thing!"
-  context: "This is a valid YAML."
-  question: "Can you help me debug this failing unit test?"
-- answer: "answer2"
-  context: "context2"
-  question: "question2"
-- answer: "answer3"
-  context: "context3"
-  question: "question3"
-- answer: "answer4"
-  context: "context4"
-  question: "question4"
-- answer: "answer5"
-  context: "context5"
-  question: "question5"
-task_description: 'This is a task'
-"""
-
 TEST_SEED_EXAMPLE = "Can you help me debug this failing unit test?"
 
 TEST_CUSTOM_YAML_RULES = b"""extends: relaxed
@@ -40,6 +21,13 @@
     max: 180
 """
 
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "testdata")
+
+
+def load_test_skills(skills_file_path) -> Union[Dict[str, Any], None]:
+    with open(skills_file_path, "r", encoding="utf-8") as skills_file:
+        return yaml.safe_load(skills_file)
+
 
 class TestTaxonomy:
     """Test taxonomy in instructlab.sdg.utils.taxonomy."""
@@ -80,17 +68,18 @@ def test_read_taxonomy_leaf_nodes(
     ):
         tracked_file = "compositional_skills/tracked/qna.yaml"
         untracked_file = "compositional_skills/new/qna.yaml"
+        test_compositional_skill_file = os.path.join(
+            TEST_DATA_DIR, "test_valid_compositional_skill.yaml"
+        )
+        test_compositional_skill = load_test_skills(test_compositional_skill_file)
         if create_tracked_file:
-            self.taxonomy.add_tracked(tracked_file, TEST_VALID_COMPOSITIONAL_SKILL_YAML)
+            self.taxonomy.add_tracked(tracked_file, test_compositional_skill)
         if create_untracked_file:
-            self.taxonomy.create_untracked(
-                untracked_file, TEST_VALID_COMPOSITIONAL_SKILL_YAML
-            )
+            self.taxonomy.create_untracked(untracked_file, test_compositional_skill)
 
         leaf_nodes = taxonomy.read_taxonomy_leaf_nodes(
             self.taxonomy.root, taxonomy_base, TEST_CUSTOM_YAML_RULES
         )
-
         assert len(leaf_nodes) == len(check_leaf_node_keys)
 
         for leaf_node_key in check_leaf_node_keys:
diff --git a/tests/testdata/test_valid_compositional_skill.yaml b/tests/testdata/test_valid_compositional_skill.yaml
new file mode 100644
index 00000000..df4ad2c8
--- /dev/null
+++ b/tests/testdata/test_valid_compositional_skill.yaml
@@ -0,0 +1,19 @@
+created_by: rafael-vasquez
+version: 1
+seed_examples:
+- answer: "Sure thing!"
+  context: "This is a valid YAML."
+  question: "Can you help me debug this failing unit test?"
+- answer: "answer2"
+  context: "context2"
+  question: "question2"
+- answer: "answer3"
+  context: "context3"
+  question: "question3"
+- answer: "answer4"
+  context: "context4"
+  question: "question4"
+- answer: "answer5"
+  context: "context5"
+  question: "question5"
+task_description: 'This is a task'
\ No newline at end of file
diff --git a/tests/testdata/test_valid_knowledge_skill.yaml b/tests/testdata/test_valid_knowledge_skill.yaml
new file mode 100644
index 00000000..705acb41
--- /dev/null
+++ b/tests/testdata/test_valid_knowledge_skill.yaml
@@ -0,0 +1,176 @@
+created_by: lukeinglis
+domain: anatomy_tonsil
+version: 3
+seed_examples:
+  - context: |
+      ## Structure
+      Humans are born with four types of tonsils: the pharyngeal tonsil, two
+      tubal tonsils, two palatine tonsils, and the lingual tonsils.[1]
+
+      <table>
+      <thead>
+      <tr class="header">
+      <th><p>Type</p></th>
+      <th><p><a href="Epithelium" title="wikilink">Epithelium</a></p></th>
+      <th><p><a href=":wikt:capsule" title="wikilink">Capsule</a></p></th>
+      <th><p><a href="Tonsillar_crypts" title="wikilink">Crypts</a></p></th>
+      <th><p>Location</p></th>
+      </tr>
+      </thead>
+      <tbody>
+      <tr class="odd">
+      <td><p><a href="Adenoid" title="wikilink">Pharyngeal tonsil</a> (also
+      termed "adenoid")</p></td>
+      <td><p><a href="pseudostratified_epithelium" title="wikilink">Ciliated
+      pseudostratified columnar</a> (<a href="respiratory_epithelium"
+      title="wikilink">respiratory epithelium</a>)</p></td>
+      <td><p>Incompletely encapsulated</p></td>
+      <td><p>Small folds—sometimes described as crypts<a href="#fn1"
+      class="footnote-ref" id="fnref1"
+      role="doc-noteref"><sup>1</sup></a></p></td>
+      <td><p>Roof of <a href="pharynx" title="wikilink">pharynx</a></p></td>
+      </tr>
+      <tr class="even">
+      <td><p><a href="Tubal_tonsils" title="wikilink">Tubal tonsils</a></p></td>
+      <td><p>Ciliated pseudostratified columnar (respiratory epithelium)</p></td>
+      <td><p>Not encapsulated</p></td>
+      <td><p>No crypts</p></td>
+      <td><p>Roof of pharynx</p></td>
+      </tr>
+      <tr class="odd">
+      <td><p><a href="Palatine_tonsils" title="wikilink">Palatine tonsils</a></p></td>
+      <td><p>Stratified squamous epithelium</p></td>
+      <td><p>Fully encapsulated</p></td>
+      <td><p>Multiple deep crypts</p></td>
+      <td><p>Each side of the throat at the back of the mouth</p></td>
+      </tr>
+
+    questions_and_answers:
+      - question: What is the location of the tubal tonsils?
+        answer: The location of the tubal tonsils is the roof of the pharynx.
+      - question: |
+          Compare the epithelial types, encapsulation, and presence of
+          crypts in the pharyngeal, tubal, and palatine tonsils according to the
+          table provided.
+        answer: |
+          The pharyngeal tonsil features ciliated pseudostratified columnar
+          epithelium and is incompletely encapsulated with small folds sometimes
+          described as crypts. The tubal tonsils also have ciliated
+          pseudostratified columnar epithelium but are not encapsulated and do
+          not possess crypts. In contrast, the palatine tonsils are covered with
+          stratified squamous epithelium, are fully encapsulated, and contain
+          multiple deep crypts. These structural differences are indicative of
+          their varied anatomical locations and potentially their distinct
+          functions within the immune system.
+      - question: What type of epithelium is found in the pharyngeal tonsil?
+        answer: |
+          The type of epithelium found in the pharyngeal tonsil is ciliated
+          pseudostratified columnar (respiratory epithelium).
+
+
+  - context: |
+      The **tonsils** are a set of [lymphoid](Lymphatic_system "wikilink")
+      organs facing into the aerodigestive tract, which is known as
+      [Waldeyer's tonsillar ring](Waldeyer's_tonsillar_ring "wikilink") and
+      consists of the [adenoid tonsil](adenoid "wikilink") (or pharyngeal
+      tonsil), two [tubal tonsils](tubal_tonsil "wikilink"), two [palatine
+      tonsils](palatine_tonsil "wikilink"), and the [lingual
+      tonsils](lingual_tonsil "wikilink"). These organs play an important role
+      in the immune system.
+
+    questions_and_answers:
+      - question: What is the immune system's first line of defense?
+        answer: |
+          The tonsils are the immune system's first line of defense against
+          ingested or inhaled foreign pathogens.
+      - question: What is Waldeyer's tonsillar ring?
+        answer: |
+          Waldeyer's tonsillar ring is a set of lymphoid organs facing into the
+          aerodigestive tract, consisting of the adenoid tonsil, two tubal
+          tonsils, two palatine tonsils, and the lingual tonsils.
+      - question: How many tubal tonsils are part of Waldeyer's tonsillar ring?
+        answer: There are two tubal tonsils as part of Waldeyer's tonsillar ring.
+
+  - context: |
+      The palatine tonsils tend to reach their largest size in [puberty](puberty
+      "wikilink"), and they gradually undergo [atrophy](atrophy "wikilink")
+      thereafter. However, they are largest relative to the diameter of the
+      throat in young children. In adults, each palatine tonsil normally
+      measures up to 2.5 cm in length, 2.0 cm in width and 1.2 cm in
+      thickness.[2]
+
+    questions_and_answers:
+      - question: When do the palatine tonsils tend to reach their largest size?
+        answer: The palatine tonsils tend to reach their largest size in puberty.
+      - question: What are the typical dimensions of an adult palatine tonsil?
+        answer: |
+          In adults, each palatine tonsil normally measures up to 2.5 cm in
+          length, 2.0 cm in width, and 1.2 cm in thickness.
+      - question: How do the palatine tonsils change in size with age?
+        answer: |
+          The palatine tonsils tend to gradually undergo atrophy after puberty,
+          becoming smaller in size compared to their dimensions in young
+          children.
+
+  - context: |
+      The tonsils are immunocompetent organs that serve as the immune system's
+      first line of defense against ingested or inhaled foreign pathogens, and
+      as such frequently engorge with blood to assist in immune responses to
+      common illnesses such as the common cold. The tonsils have on their
+      surface specialized antigen capture cells called [microfold
+      cells](microfold_cell "wikilink") (M cells) that allow for the uptake of
+      antigens produced by pathogens. These M cells then alert the B cells and T
+      cells in the tonsil that a pathogen is present and an immune response is
+      stimulated.[3] B cells are activated and proliferate in areas called
+      germinal centers in the tonsil. These germinal centers are places where B
+      memory cells are created and [secretory antibody (IgA)](Immunoglobulin_A
+      "wikilink") is produced.
+
+    questions_and_answers:
+      - question: |
+          What are the specialized antigen capture cells on the surface of the
+          tonsils called?
+        answer: |
+          The specialized antigen capture cells on the surface of the tonsils
+          are called microfold cells (M cells).
+      - question: What is the role of microfold cells in the tonsils?
+        answer: |
+          Microfold cells (M cells) allow for the uptake of antigens produced by
+          pathogens. They alert the B cells and T cells in the tonsil that a
+          pathogen is present, stimulating an immune response.
+      - question: Where do B cells proliferate in the tonsils?
+        answer: B cells proliferate in areas called germinal centers in the tonsils.
+
+  - context: |
+      A [tonsillolith](tonsillolith "wikilink") (also known as a "tonsil stone")
+      is material that accumulates on the palatine tonsil. This can reach the
+      size of a [peppercorn](peppercorn "wikilink") and is white or cream in
+      color. The main substance is mostly [calcium](calcium "wikilink"), but it
+      has a strong unpleasant odor because of [hydrogen
+      sulfide](hydrogen_sulfide "wikilink") and [methyl
+      mercaptan](methyl_mercaptan "wikilink") and other chemicals.[6]
+
+    questions_and_answers:
+      - question: What is a tonsillolith?
+        answer: |
+          A tonsillolith (tonsil stone) is material that accumulates on the
+          palatine tonsil, reaching the size of a peppercorn and having a white
+          or cream color. It contains calcium and has a strong unpleasant odor
+          due to hydrogen sulfide, methyl mercaptan, and other chemicals.
+      - question: What is the main substance found in a tonsillolith?
+        answer: The main substance found in a tonsillolith is mostly calcium.
+      - question: Why do tonsilloliths have a strong unpleasant odor?
+        answer: |
+          Tonsilloliths have a strong unpleasant odor due to hydrogen sulfide,
+          methyl mercaptan, and other chemicals.
+
+document_outline: |
+  Overview of Human tonsils, describing their types, locations, structure,
+  function, and clinical significance, with a specific focus on their role in
+  the immune system and related health issues.
+
+document:
+  repo: https://github.com/luke-inglis/il-anatomy-knowledge
+  commit: cc7c6ca
+  patterns:
+    - anatomy1.md