From c2e3d377ac08d931e146def1fb4fbe691aac5d03 Mon Sep 17 00:00:00 2001
From: Andreas Zeller <zeller@cispa.de>
Date: Sun, 5 Jan 2025 17:08:50 +0100
Subject: [PATCH] Everything is running :-)

---
 notebooks/Alhazen.ipynb | 123 ++++++++++++++++++++++++----------------
 1 file changed, 75 insertions(+), 48 deletions(-)

diff --git a/notebooks/Alhazen.ipynb b/notebooks/Alhazen.ipynb
index a7a75fd8..3d0344ae 100644
--- a/notebooks/Alhazen.ipynb
+++ b/notebooks/Alhazen.ipynb
@@ -214,7 +214,7 @@
    "outputs": [],
    "source": [
     "# Load initial input files\n",
-    "sample_list = ['sqrt(-16)', 'sqrt(4)']"
+    "initial_sample_list = ['sqrt(-16)', 'sqrt(4)']"
    ]
   },
   {
@@ -604,7 +604,11 @@
     "    @abstractmethod\n",
     "    def get_feature_value(self, derivation_tree) -> float:\n",
     "        '''Returns the feature value for a given derivation tree of an input.'''\n",
-    "        pass"
+    "        pass\n",
+    "\n",
+    "    def replace(self, new_key: str) -> 'Feature':\n",
+    "        '''Returns a new feature with the same name but a different key.'''\n",
+    "        return self.__class__(self.name, self.rule, new_key)"
    ]
   },
   {
@@ -1113,7 +1117,6 @@
    "source": [
     "from sklearn.tree import DecisionTreeClassifier\n",
     "from sklearn.feature_extraction import DictVectorizer\n",
-    "from sklearn import tree\n",
     "\n",
     "import graphviz"
    ]
@@ -1220,11 +1223,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dot_data = tree.export_graphviz(clf, out_file=None,\n",
-    "                                feature_names=vec.get_feature_names_out(),\n",
-    "                                class_names=[\"BUG\", \"NO BUG\"],\n",
-    "                                filled=True, rounded=True)\n",
-    "graph = graphviz.Source(dot_data)"
+    "import graphviz\n",
+    "import sklearn\n",
+    "\n",
+    "def show_decision_tree(clf, feature_names):\n",
+    "    dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n",
+    "                                    feature_names=feature_names,\n",
+    "                                    class_names=[\"BUG\", \"NO_BUG\"],  \n",
+    "                                    filled=True, rounded=True)  \n",
+    "    return graphviz.Source(dot_data)"
    ]
   },
   {
@@ -1233,7 +1240,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "display(graph)"
+    "show_decision_tree(clf, vec.get_feature_names_out())"
    ]
   },
   {
@@ -1309,6 +1316,7 @@
    "source": [
     "def train_tree(data):\n",
     "    sample_bug_count = len(data[(data[\"oracle\"].astype(str) == \"BUG\")])\n",
+    "    assert sample_bug_count > 0, \"No bug samples found\"\n",
     "    sample_count = len(data)\n",
     "\n",
     "    clf = DecisionTreeClassifier(min_samples_leaf=1,\n",
@@ -1392,7 +1400,7 @@
     "clf = clf.fit(X_data, oracle)\n",
     "\n",
     "import graphviz\n",
-    "dot_data = tree.export_graphviz(clf, out_file=None, \n",
+    "dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n",
     "                                feature_names=feature_names,\n",
     "                                class_names=[\"BUG\", \"NO BUG\"],  \n",
     "                                filled=True, rounded=True)  \n",
@@ -1697,13 +1705,19 @@
     "            mini = row['min']\n",
     "            maxi = row['max']\n",
     "            if (not np.isinf(mini)) or (not np.isinf(maxi)):\n",
-    "                requirements.append(Requirement(feature, mini, maxi))\n",
+    "                requirements.append(TreeRequirement(feature, mini, maxi))\n",
     "        paths.append(TreePath(None, is_bug, requirements))\n",
     "\n",
-    "    return paths\n",
-    "\n",
-    "\n",
-    "class Requirement:\n",
+    "    return paths\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TreeRequirement:\n",
     "\n",
     "    def __init__(self, feature: Feature, mini, maxi):\n",
     "        self.__feature: Feature = feature\n",
@@ -1788,10 +1802,10 @@
     "\n",
     "class TreePath:\n",
     "\n",
-    "    def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[Requirement]):\n",
+    "    def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[TreeRequirement]):\n",
     "        self.__sample = samplefile\n",
     "        self.__is_bug = is_bug\n",
-    "        self.__requirements: List[Requirement] = requirements\n",
+    "        self.__requirements: List[TreeRequirement] = requirements\n",
     "\n",
     "    def is_bug(self) -> bool:\n",
     "        return self.__is_bug\n",
@@ -2152,7 +2166,7 @@
     "from typing import List\n",
     "from fuzzingbook.GrammarFuzzer import DerivationTree\n",
     "\n",
-    "class Requirement:\n",
+    "class SpecRequirement:\n",
     "    '''\n",
     "    This class represents a requirement for a new input sample that should be generated.\n",
     "    This class contains the feature that should be fullfiled (Feature), a quantifier\n",
@@ -2171,9 +2185,15 @@
     "        self.value = value\n",
     "\n",
     "    def __str__(self):\n",
-    "        return f\"Requirement({self.feature.name} {self.quant} {self.value})\"\n",
-    "\n",
-    "\n",
+    "        return f\"Requirement({self.feature.name} {self.quant} {self.value})\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "class InputSpecification:\n",
     "    '''\n",
     "    This class represents a complet input specification of a new input. A input specification\n",
@@ -2181,8 +2201,8 @@
     "    requirements  : Is a list of all requirements that must be used.\n",
     "    '''\n",
     "\n",
-    "    def __init__(self, requirements: List[Requirement]):\n",
-    "        self.requirements: List[Reqirement] = requirements\n",
+    "    def __init__(self, requirements: List[SpecRequirement]):\n",
+    "        self.requirements: List[SpecRequirement] = requirements\n",
     "\n",
     "    def __str__(self):\n",
     "        # Handle first element\n",
@@ -2227,7 +2247,7 @@
     "            if f.name == feature_name:\n",
     "                feature_class = f\n",
     "\n",
-    "        requirement_list.append(Requirement(feature_class, quant, value))\n",
+    "        requirement_list.append(SpecRequirement(feature_class, quant, value))\n",
     "\n",
     "    return InputSpecification(requirement_list)\n",
     "\n",
@@ -2489,6 +2509,15 @@
     "    return final_samples"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "generate_samples = generate_samples_advanced"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -2524,16 +2553,16 @@
     "exsqrt = ExistenceFeature('exists(<function>@0)', '<function>', 'sqrt')\n",
     "exdigit = ExistenceFeature('exists(<digit>)', '<digit>', '<digit>')\n",
     "\n",
-    "reqDigit = Requirement(exdigit, '>', '0.5')\n",
-    "fbdDigit = Requirement(exdigit, '<=', '0.5')\n",
+    "reqDigit = SpecRequirement(exdigit, '>', '0.5')\n",
+    "fbdDigit = SpecRequirement(exdigit, '<=', '0.5')\n",
     "\n",
-    "req0 = Requirement(exsqrt, '>', '-6.0')\n",
+    "req0 = SpecRequirement(exsqrt, '>', '-6.0')\n",
     "testspec0 = InputSpecification([req0, reqDigit])\n",
-    "req1 = Requirement(exsqrt, '<=', '-6.0')\n",
+    "req1 = SpecRequirement(exsqrt, '<=', '-6.0')\n",
     "testspec1 = InputSpecification([req1, fbdDigit])\n",
     "\n",
     "numterm = NumericInterpretation('num(<term>)', '<term>')\n",
-    "req2 = Requirement(numterm, '<', '-31.0')\n",
+    "req2 = SpecRequirement(numterm, '<', '-31.0')\n",
     "testspec2 = InputSpecification([req2, req0, reqDigit])\n",
     "\n",
     "print('--generating samples--')\n",
@@ -2663,7 +2692,8 @@
     "\n",
     "# let's initialize Alhazen\n",
     "# let's use the previously used sample_list (['sqrt(-16)', 'sqrt(4)'])\n",
-    "alhazen = Alhazen(sample_list, CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n",
+    "alhazen = Alhazen(initial_sample_list,\n",
+    "                  CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n",
     "\n",
     "# and run it\n",
     "# Alhazen returns a list of all the iteratively learned decision trees\n",
@@ -2674,10 +2704,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "\n",
-    "</hr>\n",
-    "\n",
-    "Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decison_tree, features)` to display the final tree."
+    "Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decision_tree, features)` to display the final tree."
    ]
   },
   {
@@ -2686,12 +2713,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def show_tree(clf, feature_names):\n",
-    "    dot_data = tree.export_graphviz(clf, out_file=None, \n",
-    "                                    feature_names= feature_names,\n",
-    "                                    class_names=[\"BUG\", \"NO_BUG\"],  \n",
-    "                                    filled=True, rounded=True)  \n",
-    "    return graphviz.Source(dot_data)"
+    "final_tree = trees[MAX_ITERATION-1]\n",
+    "final_tree"
    ]
   },
   {
@@ -2701,14 +2724,23 @@
    "outputs": [],
    "source": [
     "all_features = extract_existence(CALC_GRAMMAR) + extract_numeric(CALC_GRAMMAR)\n",
-    "# show_tree(trees[MAX_ITERATION-1], all_features)"
+    "all_feature_names = [f.name for f in all_features]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_decision_tree(final_tree, all_feature_names)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function 'remove_unequal_decisions(decision_tree)' to remove those nodes."
+    "**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function `remove_unequal_decisions(decision_tree)` to remove those nodes."
    ]
   },
   {
@@ -2717,14 +2749,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "show_tree(remove_unequal_decisions(trees[MAX_ITERATION-1]), all_features)"
+    "show_decision_tree(remove_unequal_decisions(final_tree), all_feature_names)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},