Skip to content
Snippets Groups Projects
UCM_results_analysis_Python.ipynb 119 KiB
Newer Older
Dong Cao's avatar
Dong Cao committed

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn\n",
    "from sklearn.metrics import r2_score\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.pyplot import figure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>dataset</th>\n",
       "      <th>attributes</th>\n",
       "      <th>instances</th>\n",
       "      <th>classes</th>\n",
       "      <th>mv_acc</th>\n",
       "      <th>ucm_acc</th>\n",
       "      <th>wmv_acc</th>\n",
       "      <th>mv_f1</th>\n",
       "      <th>ucm_f1</th>\n",
       "      <th>wmv_f1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>abalone</td>\n",
       "      <td>8</td>\n",
       "      <td>4177</td>\n",
       "      <td>29</td>\n",
       "      <td>26.56</td>\n",
       "      <td>26.41</td>\n",
       "      <td>26.03</td>\n",
       "      <td>23.66</td>\n",
       "      <td>23.41</td>\n",
       "      <td>23.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>anneal</td>\n",
       "      <td>38</td>\n",
       "      <td>798</td>\n",
       "      <td>6</td>\n",
       "      <td>92.60</td>\n",
       "      <td>91.35</td>\n",
       "      <td>92.22</td>\n",
       "      <td>92.64</td>\n",
       "      <td>91.11</td>\n",
       "      <td>92.27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>arrhythmia</td>\n",
       "      <td>279</td>\n",
       "      <td>452</td>\n",
       "      <td>16</td>\n",
       "      <td>66.74</td>\n",
       "      <td>65.41</td>\n",
       "      <td>66.08</td>\n",
       "      <td>58.04</td>\n",
       "      <td>55.62</td>\n",
       "      <td>57.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>audiology</td>\n",
       "      <td>69</td>\n",
       "      <td>226</td>\n",
       "      <td>24</td>\n",
       "      <td>82.23</td>\n",
       "      <td>78.72</td>\n",
       "      <td>82.25</td>\n",
       "      <td>78.47</td>\n",
       "      <td>73.88</td>\n",
       "      <td>78.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>breast-cancer</td>\n",
       "      <td>9</td>\n",
       "      <td>286</td>\n",
       "      <td>2</td>\n",
       "      <td>71.55</td>\n",
       "      <td>70.48</td>\n",
       "      <td>70.18</td>\n",
       "      <td>67.95</td>\n",
       "      <td>67.31</td>\n",
       "      <td>67.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>breast-cancer-w</td>\n",
       "      <td>9</td>\n",
       "      <td>699</td>\n",
       "      <td>2</td>\n",
       "      <td>96.85</td>\n",
       "      <td>96.85</td>\n",
       "      <td>97.00</td>\n",
       "      <td>96.87</td>\n",
       "      <td>96.87</td>\n",
       "      <td>97.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>car</td>\n",
       "      <td>6</td>\n",
       "      <td>1728</td>\n",
       "      <td>4</td>\n",
       "      <td>88.72</td>\n",
       "      <td>89.35</td>\n",
       "      <td>87.50</td>\n",
       "      <td>89.18</td>\n",
       "      <td>89.59</td>\n",
       "      <td>87.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>crx</td>\n",
       "      <td>15</td>\n",
       "      <td>690</td>\n",
       "      <td>2</td>\n",
       "      <td>84.18</td>\n",
       "      <td>84.18</td>\n",
       "      <td>83.46</td>\n",
       "      <td>82.74</td>\n",
       "      <td>83.05</td>\n",
       "      <td>82.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>dermatology</td>\n",
       "      <td>34</td>\n",
       "      <td>366</td>\n",
       "      <td>6</td>\n",
       "      <td>97.24</td>\n",
       "      <td>96.97</td>\n",
       "      <td>97.24</td>\n",
       "      <td>97.23</td>\n",
       "      <td>96.94</td>\n",
       "      <td>97.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>ecoli</td>\n",
       "      <td>7</td>\n",
       "      <td>336</td>\n",
       "      <td>4</td>\n",
       "      <td>86.54</td>\n",
       "      <td>86.25</td>\n",
       "      <td>86.55</td>\n",
       "      <td>85.62</td>\n",
       "      <td>85.28</td>\n",
       "      <td>85.66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>glass</td>\n",
       "      <td>10</td>\n",
       "      <td>214</td>\n",
       "      <td>7</td>\n",
       "      <td>66.28</td>\n",
       "      <td>65.76</td>\n",
       "      <td>70.48</td>\n",
       "      <td>61.50</td>\n",
       "      <td>60.56</td>\n",
       "      <td>66.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>ionosphere</td>\n",
       "      <td>34</td>\n",
       "      <td>351</td>\n",
       "      <td>2</td>\n",
       "      <td>92.86</td>\n",
       "      <td>91.14</td>\n",
       "      <td>90.29</td>\n",
       "      <td>92.66</td>\n",
       "      <td>90.80</td>\n",
       "      <td>89.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>iris</td>\n",
       "      <td>5</td>\n",
       "      <td>150</td>\n",
       "      <td>3</td>\n",
       "      <td>95.33</td>\n",
       "      <td>95.33</td>\n",
       "      <td>95.33</td>\n",
       "      <td>95.29</td>\n",
       "      <td>95.29</td>\n",
       "      <td>95.29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>kr-vs-kp</td>\n",
       "      <td>36</td>\n",
       "      <td>3196</td>\n",
       "      <td>2</td>\n",
       "      <td>96.87</td>\n",
       "      <td>96.40</td>\n",
       "      <td>95.93</td>\n",
       "      <td>96.86</td>\n",
       "      <td>96.37</td>\n",
       "      <td>95.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "      <td>labor-neg</td>\n",
       "      <td>16</td>\n",
       "      <td>57</td>\n",
       "      <td>2</td>\n",
       "      <td>94.33</td>\n",
       "      <td>94.33</td>\n",
       "      <td>96.33</td>\n",
       "      <td>94.19</td>\n",
       "      <td>94.19</td>\n",
       "      <td>96.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>letter</td>\n",
       "      <td>16</td>\n",
       "      <td>20000</td>\n",
       "      <td>26</td>\n",
       "      <td>92.36</td>\n",
       "      <td>91.39</td>\n",
       "      <td>94.76</td>\n",
       "      <td>92.41</td>\n",
       "      <td>91.43</td>\n",
       "      <td>94.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>liver disorders</td>\n",
       "      <td>6</td>\n",
       "      <td>345</td>\n",
       "      <td>2</td>\n",
       "      <td>72.94</td>\n",
       "      <td>72.06</td>\n",
       "      <td>73.24</td>\n",
       "      <td>72.81</td>\n",
       "      <td>71.23</td>\n",
       "      <td>72.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>lymphography</td>\n",
       "      <td>20</td>\n",
       "      <td>148</td>\n",
       "      <td>4</td>\n",
       "      <td>80.95</td>\n",
       "      <td>80.29</td>\n",
       "      <td>81.67</td>\n",
       "      <td>79.71</td>\n",
       "      <td>78.79</td>\n",
       "      <td>80.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "      <td>nursery</td>\n",
       "      <td>8</td>\n",
       "      <td>12960</td>\n",
       "      <td>5</td>\n",
       "      <td>90.57</td>\n",
       "      <td>90.90</td>\n",
       "      <td>89.29</td>\n",
       "      <td>89.76</td>\n",
       "      <td>90.10</td>\n",
       "      <td>88.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>19</td>\n",
       "      <td>20</td>\n",
       "      <td>page-blocks</td>\n",
       "      <td>10</td>\n",
       "      <td>5473</td>\n",
       "      <td>5</td>\n",
       "      <td>96.05</td>\n",
       "      <td>96.18</td>\n",
       "      <td>96.03</td>\n",
       "      <td>95.57</td>\n",
       "      <td>95.76</td>\n",
       "      <td>95.71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>segment</td>\n",
       "      <td>21</td>\n",
       "      <td>2310</td>\n",
       "      <td>7</td>\n",
       "      <td>95.93</td>\n",
       "      <td>96.32</td>\n",
       "      <td>96.19</td>\n",
       "      <td>95.87</td>\n",
       "      <td>96.28</td>\n",
       "      <td>96.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>21</td>\n",
       "      <td>22</td>\n",
       "      <td>sonar</td>\n",
       "      <td>208</td>\n",
       "      <td>60</td>\n",
       "      <td>2</td>\n",
       "      <td>66.71</td>\n",
       "      <td>66.74</td>\n",
       "      <td>67.64</td>\n",
       "      <td>65.39</td>\n",
       "      <td>65.87</td>\n",
       "      <td>66.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>22</td>\n",
       "      <td>23</td>\n",
       "      <td>spambase</td>\n",
       "      <td>57</td>\n",
       "      <td>4601</td>\n",
       "      <td>2</td>\n",
       "      <td>93.91</td>\n",
       "      <td>93.98</td>\n",
       "      <td>93.83</td>\n",
       "      <td>93.88</td>\n",
       "      <td>93.96</td>\n",
       "      <td>93.81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ID          dataset  attributes  instances  classes  mv_acc  ucm_acc  \\\n",
       "0    1          abalone           8       4177       29   26.56    26.41   \n",
       "1    2           anneal          38        798        6   92.60    91.35   \n",
       "2    3       arrhythmia         279        452       16   66.74    65.41   \n",
       "3    4        audiology          69        226       24   82.23    78.72   \n",
       "4    5    breast-cancer           9        286        2   71.55    70.48   \n",
       "5    6  breast-cancer-w           9        699        2   96.85    96.85   \n",
       "6    7              car           6       1728        4   88.72    89.35   \n",
       "7    8              crx          15        690        2   84.18    84.18   \n",
       "8    9      dermatology          34        366        6   97.24    96.97   \n",
       "9   10            ecoli           7        336        4   86.54    86.25   \n",
       "10  11            glass          10        214        7   66.28    65.76   \n",
       "11  12       ionosphere          34        351        2   92.86    91.14   \n",
       "12  13             iris           5        150        3   95.33    95.33   \n",
       "13  14         kr-vs-kp          36       3196        2   96.87    96.40   \n",
       "14  15        labor-neg          16         57        2   94.33    94.33   \n",
       "15  16           letter          16      20000       26   92.36    91.39   \n",
       "16  17  liver disorders           6        345        2   72.94    72.06   \n",
       "17  18     lymphography          20        148        4   80.95    80.29   \n",
       "18  19          nursery           8      12960        5   90.57    90.90   \n",
       "19  20      page-blocks          10       5473        5   96.05    96.18   \n",
       "20  21          segment          21       2310        7   95.93    96.32   \n",
       "21  22            sonar         208         60        2   66.71    66.74   \n",
       "22  23         spambase          57       4601        2   93.91    93.98   \n",
       "\n",
       "    wmv_acc  mv_f1  ucm_f1  wmv_f1  \n",
       "0     26.03  23.66   23.41   23.48  \n",
       "1     92.22  92.64   91.11   92.27  \n",
       "2     66.08  58.04   55.62   57.03  \n",
       "3     82.25  78.47   73.88   78.70  \n",
       "4     70.18  67.95   67.31   67.96  \n",
       "5     97.00  96.87   96.87   97.01  \n",
       "6     87.50  89.18   89.59   87.67  \n",
       "7     83.46  82.74   83.05   82.76  \n",
       "8     97.24  97.23   96.94   97.21  \n",
       "9     86.55  85.62   85.28   85.66  \n",
       "10    70.48  61.50   60.56   66.45  \n",
       "11    90.29  92.66   90.80   89.88  \n",
       "12    95.33  95.29   95.29   95.29  \n",
       "13    95.93  96.86   96.37   95.90  \n",
       "14    96.33  94.19   94.19   96.19  \n",
       "15    94.76  92.41   91.43   94.78  \n",
       "16    73.24  72.81   71.23   72.69  \n",
       "17    81.67  79.71   78.79   80.60  \n",
       "18    89.29  89.76   90.10   88.64  \n",
       "19    96.03  95.57   95.76   95.71  \n",
       "20    96.19  95.87   96.28   96.14  \n",
       "21    67.64  65.39   65.87   66.96  \n",
       "22    93.83  93.88   93.96   93.81  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "performance_df = pd.read_csv('performance.csv')\n",
    "performance_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 880x200 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "datasets = performance_df['dataset']\n",
    "acc_diff = performance_df['ucm_acc'] - performance_df['mv_acc']\n",
    "\n",
    "figure(figsize=(11, 2.5), dpi=80)\n",
    "plt.xticks(performance_df['ID'])\n",
    "plt.xlabel('Dataset ID', fontsize=12)\n",
    "plt.ylabel('Accuracy Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "\n",
    "plt.bar(performance_df['ID'], acc_diff, color='#8c9bb5', width=0.95)\n",
    "plt.savefig('accuracy-graph.png', bbox_inches=\"tight\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: The graph above shows the accuracy difference between UCM and Majority Voting. As most of the bars are below 0, we can say that Majority Voting is the winner."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<BarContainer object of 23 artists>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 400x640 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# this is the same graph as the one above but shown vertically\n",
    "\n",
    "figure(figsize=(5, 8), dpi=80)\n",
    "plt.xlabel('Accuracy Difference', fontsize=12)\n",
    "plt.yticks(performance_df['ID'])\n",
    "plt.ylabel('Dataset ID', fontsize=12)\n",
    "plt.barh(performance_df['ID'], acc_diff, color='#8c9bb5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 880x200 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "f1_diff = performance_df['ucm_f1'] - performance_df['mv_f1']\n",
    "\n",
    "figure(figsize=(11, 2.5), dpi=80)\n",
    "plt.xticks(performance_df['ID'])\n",
    "plt.xlabel('Dataset ID', fontsize=12)\n",
    "plt.ylabel('F1 Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "\n",
    "plt.bar(performance_df['ID'], f1_diff, color='#8c9bb5', width=0.95)\n",
    "plt.savefig('f1-graph.png', bbox_inches=\"tight\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: The graph above shows the $F_1$ difference between UCM and Majority Voting. Regarding this metric, Majority Voting is still the winner on most of the datasets and a very similar pattern to the accuracy difference is observed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R^2 score: 0.005220518803246588\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 640x320 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "ucm_mv_acc_diff = performance_df['ucm_acc'] - performance_df['mv_acc']\n",
    "wmv_mv_acc_diff = performance_df['wmv_acc'] - performance_df['mv_acc'] \n",
    "\n",
    "figure(figsize=(8, 4), dpi=80)\n",
    "plt.xlabel('WV Ac - MV Ac', fontsize=12)\n",
    "plt.ylabel('UCM Ac - MV Ac', fontsize=12)\n",
    "plt.scatter(wmv_mv_acc_diff, ucm_mv_acc_diff, color='#8c9bb5')\n",
    "\n",
    "slope, intercept = np.polyfit(wmv_mv_acc_diff, ucm_mv_acc_diff, 1)\n",
    "x_min = wmv_mv_acc_diff.min()\n",
    "y_min = slope*x_min + intercept\n",
    "x_max = wmv_mv_acc_diff.max()\n",
    "y_max = slope*x_max + intercept\n",
    "plt.plot([x_min, x_max], [y_min, y_max], color='red', linestyle='--')\n",
    "plt.savefig('wv-graph.png', bbox_inches=\"tight\")\n",
    "\n",
    "y_true = ucm_mv_acc_diff\n",
    "y_fit = slope*wmv_mv_acc_diff + intercept\n",
    "print(f\"R^2 score: {r2_score(y_true, y_fit)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: With the line of best fit being nearly horizontal, the relative performance of Weighted Voting (or WV) has little effect on the relative performance of UCM with respect to Majority Voting (MV) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f1eee175990>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 640x320 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "figure(figsize=(8, 4), dpi=80)\n",
    "plt.xlabel('Attributes', fontsize=12)\n",
    "plt.ylabel('Accuracy Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "\n",
    "plt.scatter(performance_df['attributes'], acc_diff, color='#8c9bb5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: The graph above shows no relationship between the number of attributes and the accuracy difference between UCM and MV."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f1eee175550>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 640x320 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "figure(figsize=(8, 4), dpi=80)\n",
    "plt.xlabel('Classes', fontsize=12)\n",
    "plt.ylabel('Accuracy Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "\n",
    "plt.scatter(performance_df['classes'], acc_diff, color='#8c9bb5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: The graph above shows no relationship between the number of classes and the accuracy difference between UCM and MV."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R^2 score: -0.08586902513910477\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 640x320 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "figure(figsize=(8, 4), dpi=80)\n",
    "plt.xlabel('Log(instances)', fontsize=12)\n",
    "plt.ylabel('Accuracy Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "plt.scatter(np.log(performance_df['instances']), acc_diff, color='#8c9bb5')\n",
    "\n",
    "plt.scatter(np.log(performance_df['instances'][15]), acc_diff[15], s=800, facecolors='none', edgecolors='orange')\n",
    "\n",
    "y_true = acc_diff \n",
    "y_fit = slope*np.log(performance_df['instances']) + intercept \n",
    "print(f\"R^2 score: {r2_score(y_true, y_fit)}\")\n",
    "\n",
    "plt.savefig('instances-acc-graph.png', bbox_inches=\"tight\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: There seems to be an increasing relationship between the number of instances and the accuracy difference between UCM and MV. However, there is an outlier (circled)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R^2 score: 0.11255741672134822\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAEpCAYAAAC9TAmAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAMTQAADE0B0s6tTgAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXxcdb3/8ddn0lKaNklXuqW0LC2CCpSlFi6UgqzuSnAXWkX5KV68FuR6BdmRwkVUhHtVlEUqbi2iolThyq5SqVJBdrAtSTcKTZOSUtrk8/vjO+NMJpNlkjM5Z5L38/GYBzPnnDnzybTAJ9/v5/v5mrsjIiIikkSpuAMQERER6YwSFREREUksJSoiIiKSWEpUREREJLGUqIiIiEhiKVERERGRxFKiIiIiIomVyETFzK41s1Vm5mb2lrjjERERkXgkMlEBlgBHAKvjDkRERETiMyTuAApx9wcAzCzuUERERCRGiUxUesrMFgILM68rKiqmTJw4McaIREREpFgNDQ1vuPuwQucsyXv9mNkq4F3u/kRPrq+trfX6+vrSBiUiIiKRMrMGd68tdC6pNSoiIiIiSlREREQkuRKZqJjZ9WZWD9QC95jZ83HHJCIiIv0vkYmKu5/p7rXuPsTdJ7r73nHHJCIiIv0vkYmKiIiICChRERERkQRToiIiIiKJpURFREREEkuJioiIiCRWWbfQFxEZCNydhg2baWxqYVR1JVMmjNZeZyJpSlRERGLU1LyNJcuWs6W5hYpUita2NmqqKqk7cTbVVcPjDk8kdpr6ERGJibuzZNlyGptaaGtzduxspa3NaWxqYemy5SR5LzaR/qJERUQkJg0bNtPUvK1DQuLubGluoWHD5pgiE0kOJSoiIjFpbGohlSpci5JKpWhsaunniESSR4mKiEhMRlVX0trWVvBcW1sbo6or+zkikeRRoiIiEpMpE0ZTU1XZYYWPmVFTFVb/iAx2SlRERGJiZtSdOJtR1ZVUpIyhQyqoSBmjqys5+aTZWqIsAthAqiqvra31+vr6uMMQESmK+qjIYGdmDe5eW+ic+qiIiMTMzKidOIbaiWPiDkUkcZSoiIgMAhq1kXKlREVEZIBT91spZyqmFREZwNT9VvrK3alf/ypPPFtP/fpX+/3vjEZUREQGsJ50v1VtjHQmCaNxGlERERnA1P1Weispo3FKVEREBjB1v5XeSspeVEpUREQGMHW/ld5KymicEhURkQFM3W+lt5IyGqdiWhGRAa66ajgL6uaqj4oUJTMa19jU0m76p79H4zSiIiIyCGS6375lZi21E8coSZFuJWU0Tnv9iIjIoKauvV3rj+9He/2IiIgUkIQ+IUkX915UmvoREZFBKSl9QqRrSlRERGRAKLbVe1L6hMRu505oaIBHH4Vf/xq++93wGuDll+GQQ+DrX48tPE39FElzmSIiydObKZxMn5DWAitwM31Cynp7gR07YP16WLcu+1i7FubNg2OOCdfMmgUrV0J+UverX8GUKVBdHZKV7dv7PfwMJSrdyE1Mhg6p4OEVz2ouswhK7AYnd6dh/av8s/5lwJheO04rTaRkcqdw3J22tlaAf03hzK+bW/DvXlL6hPTKq6/Cc8+FxCOTgKxbB/vsA+eeG675yEdg6dKO792xo32isueeMHkyTJqU/efBB4fzw4bB6tX98zN1IrGJipnNAG4BxgGNwHx3f7I/Y8jN0FNm7MxJu3v6L8JgpiK1wampeRs/++0jbGnOdq1cvvIFqquG86F3zNGfvUSutxsvJqVPSAebNsH992eTj0wikkrBXXeFa266Cc45p+N7jzsum6i8851hVCQ3AZk8GaZOzV5/442l/3n6KLGJCvBd4HvufrOZ1QE/AA7rrw/vkKFTeK5TO5AW1tvfcKS8Zf7cc5OUjKbmbSy56xEWnHKU/uwlUr2dwsn0CVmybDlNzS2kUina0r9QRdonZNu2kGQMGxamUL7znfajIJnnTz4JEyfCM89AXV37e1RWwh57ZF/PmwdXXdU+CZk0CWpqstcsWBBN/DFLZKJiZrsBBwHHpw8tBa4zs+nuvqo/YugsQy9kQMxlRkxbyw9ODRs2F0xSMvRnL6XQlymcPnXtfe21kGRUVoZkAWDRInjiifZ1IY2NcPvt8P73Q0UFfPGL2ZqQkSPDe/ffH15/PRzbbz9YvLj9KEhVFeTGdPDB2emZAS6RiQowFVjr7jsB3N3NbA2wO7Aqc5GZLQQWZl7X5GaSfdRVhp4v8XOZMRjwRWpSUGNTC4ZBJyOQZqY/e4lcX6dwOvQJaW7OjnYMHQr/9m/h+Le/HRKOTALS1BSOn3suXHlleP6LX8Dy5WFkY9KkUAMyeTJMmBDODxkCDz4I48eH81VVHQMaPRo+9rG+fCUDSlITFej4X7oO6a27XwNck3ldW1sb2aL3rjL0dkHFPZeZUGVdpCa9Nqq6sstRSHfXn71ErkdTOO4hscgtPm1uhs9+Ntzk5z+H884Lx197LXvzI44IiQXACy+EFTKTJsHs2dnRjqOPzl7/q1+F5KOyi7/nmcRHeiSpicpLQK2ZDXH3nRbG4KYCa/orgM4y9IyhQypKM5c5QCS2SE1KasqE0dRUV7J5y2sFz+vPXvqi4CpCgC1bqF67lgWjd7B5/SreWFPPiMZXGPm1S7GRw+Gxx+Dww0OtSK4hQ+CMM0L9yC67hMfhh7ev+5g5M3v9NdfAN7/ZdZCZkROJTGL3+jGz+4Cbc4ppz3H3OV29J+q9fjKrVnIz9OqRwzni0H14Y0erltt2o9D3l0nsqkdq5cdAVWjVD0BN1XA++M45+rOX4jzzDKxezbYXV/OPB/9CxYYNjNzyKo8d/S6aD51D3fGHUD22BlpbO753+XI49NAwgrJgQfui08zzt70tJCoSq672+klyorIPcDMwFmgCTnP3f3T1nlJsSqg+IH2j729wUh8V6ZR7tih05Ur461879gJ597vhK18J1xx+OPzpT+1vYcbdH/88Txx1EqOrK5n/p19iI0YUTkSGJHXiQHKVZaLSG9o9WUQkJm1t8MoroUgUwlLbpUvbJyBr14bmYvffH64555z2rdlTKdhtN5g/H664Ihz76U/ZvLqBB+ubaKoezWs1o3mtejReUQFARcqoe8fbVKBd5rR7soiI9E5rK2zcmO16OmJEeH7xxe2TkA0bQrLyxhthCe5TT8EFF4R7VFSE2o3Jk8M9Mk49NfQDyYyC7LZbxxGQD32IhmfrWfXHf7BjZ8fpHa0iHPiUqIgkgKbIpN/t3BmSi0yisd9+sPfe4dzJJ8OqVe0TEAhTMHPmhNff/W5IKiZNCt1PDz00PN++Pax4OeaYMK0zeTKMGxeSlXz77x8e3dAqwsFNiYpIP+ksGdFWAxKp3I3oMknItGnwjneE86efDnfeGUZJcqf+r7kmNCID+Pvfw32mTQuJSW7NB4R/btwIY8d2Xog6enR4RECrCAc3JSoi/aCzZOTkEw5l6e/+oq0GpHs7dnQsOl23LrRl/+pXwzUXXwwXXdTxvaeckk1URoyAvfYK/UFyE5DDcnYoefbZ9l1Q86VS2VqUftBvre4lkVRMK1Ji7s5NSx4o+NvgyBHDeO217bQV+PdQRYKDyPbtsGJF4UTkzjtDd9Q77ggt2PPV1sJLL4XnS5bAz37WcSfcPfcMjzKnKdKBS8W0IjHqat+jra9tJ5UyaO2YqKhIsMzlLsO94w5YvbpjInLjjaGPR2Nj4W6lI0aElTQTJ8Kb3xymZvJ3ws1Mx0DYyC5/M7sBpEOrexkUlKiIlFhX+x5VpExFguVm69awedy4ceH1kiXwyCMdR0LOPz8svwVYuBD++c/sPaqqQoLRkm6KN348XH11xz4gufvAzJgR6khEBhklKiIl1uWKBXdGVu7K1pbtKhKMk3vY9yWz0dyhh4bjd90Ft97aPglpboYPfCD0CIGwCd1tt4XnNTUhwTj44PYjHd/7XmjPnklCRo5s//mpFJx9dul/TpEypERFpMS6WrEwqqqSD6QLalUkWALusGVL+0Tjgx8MScNf/xqmUjLHMxvRTZwYXkPYhO7HPw6rVyZPDtM0kyeHlTAZl18Ol1wSEpDONqI79tjS/pwiA1jRxbRmNhnYE/iLu28vSVS9pGJaSaru9j1SkWCR3GHz5o7TLSefHHqBNDfDgQeGc6+/3v69q1aFZbcrVsAJJ7Sv9Zg0KRSnnnlmuHbbtlBnsuuu/f4jliv9XZbeiKSY1szGAIuBEwEHZgAvmtl3gc3u/uUoghUZiKqrhrOgbm6n/wFXkWCOxsawiiW/9frb3w7ve1+45q1vhX8U2Ppr2rSQqIwcGUZBZs7smIiMSX/HBx8MmzZ1Hctw9bEphnoCSSkUM/VzLdAK1ALP5BxfAnwLUKIiiZDU3+iUjBCSjscey46AZBKRvfeGq64K15x1VqgLyWeWTVTe+c4w/ZK/+iXTnt0MHn20f34mAcK/d0uWLVdPIIlcMYnKCcBR7r427y/b88C0SKMS6SX9RtfP2tpCUmAGL78Mv/xlxyW4O3bA3/4Wrr/jDvjc59rfI5WC44/Pvn7f+0LPj/wVMLvtlr3myitL/7NJUbpahr+luYWGDZsHd5IuvVZMotLZtVOArRHEItIn+o0uQjt3hhbpVVXhsXNnKBrNn47ZsAGeeSZ0Om1ogE9/OnuPiopQmDplSkhoUik4+uiwAiZ3FGS33drvA/OBD4SHlJWuluGrJ5D0RTGJym+BL5vZ/PRrN7NxwBXAr6IOTKRY+o2uB3bsCMnFunVhKe3MmeH4RReFqZJMArJxY0gubr0VPv7xkEgsWhQKU4cODQnI1Kkwe3b23jNmwG9+k01Axo/vuA/Mm94UHjLgaONAKZViEpUvEOpR6oHhhMRld+Bx4EvRhyZSnEH9G90bb4SN6DKjHbvsEuo4AL7xDbjllnD85ZezG9GdeSZcd114/sAD8PDDIcGYPh0OPzw832uvcN4sLOcdPz4UoxbaiG7EiOx+MjLoaONAKZUeJyruvgmYZ2ZHAm8FRgIrgd/7QNowSMrWgPyNbvv29gnI2rXw6qtwwQXh/JIl8NnPdly9ctBB2USluTk0Mdt7b5g7Nzvt8ra3Za+/886wwqWrqbF99432Z5MBRRsHSqloU0Ipmf5efdPV5n+jqyuTVaPiHlqq569+WbsWLr0Udt8dnnwy7O9SyLZtobfHvffChRe2LzqdNAn22CPsjivSz5K66k6Sras+Kj1OVMzsOuApd78+7/iZwEx3/0KfI+0jJSrJEdfqm+4aq/WbFSvg+ec7NiQ76yx4z3vCNVVVYd+YfPfdB0cdFfqJfPazHVe/TJoU6kFyC1BFRMpYVInKOuAkd38s7/gBwF3uPrnPkfaREpVkiHtkoyS/0W3fDsOGhefLl8NDD7VPQNatC1MtV18drjn+eLj77vb3qK4OvULOOCO8XrSo/f4vmSQkdyM6EZFBIJLOtMAoYFuB468DqpKSf4l79U2PG6vlbkSXaRT2xBNw000dp2P23BNWrgzX3HlnmJ7JGDUqJBi5XUzPPhs+9an2CciIEe0//8vqkSgi0p1iEpWVwMeAC/KOfwJ4IrKIpOzFvvrGPUybZJKMgw8O7dQ3boR///f2SUhLS3jP1q0hkaivh2uuCcfGjAlJxmGHtV9Se9pp2T1i8hOUjBNOKN3PJyIyiBSTqHwVuNPMDgTuSx+bBxwPvDvasKSclWz1jXtY8ZI73XLAAeEBYWns00+H47kb0d1zT9gnZtgw+NnPYOzYkIAccUR2tKM1NIfjyCNDkevEiZ1vRLfXXtlluyIiUlLFLE++28xmAecCHwWM0ENllrs/VaL4pAwV3U/BHV55pX3Nx9q1oZ/HRz8arjntNPjJT0K/kFyXXppNVJqbQx3IPvu03wNmxoxwvro6JDCZWpNCRozoOEUjIiKx0fJkKYnM6puWTa8y5uV1VG7exLjXt3LI+OHsuullGDIErr02XHzppdm+ILne8Y7Q6RTgkktCEWv+Trj77x+W4oqISNmKZNVP+kZDgJnAbkC71pTu/oe+BBkFJSr9bPv2MK2Svwnd2rXwf/+HV1Wx6We3M/7DdR3fO358qBkB+P3vYenSjjvhTp3afiM6EREZkKJannw0cCtQaBmyu3vsTR2UqPTRzp1hL5hMcejixdleILmJyC23wLHHZqdacg0ZEuo7HnoIpk0LG9X94AftE5DJk0OiMkj7gKghlohIe1EtT74e+A1wobuvjyQy6R87doQ27Dt3ZqdJbrstdDXNHQXZuDF0Ob3wwnDNlVeG5boQNqLLjHJk9nmpqgpLeSdMyCYh48a13wdmypTC0zqDVFyN8EREylUxIypbgQPc/YXShtR7g25EJbMPzLp1YS+X448Px++8E66/PpuAbNoUClZPOAGWLQvXfOYzcMMNobA0U+8xaRLU1cFHPhKuefTR7PmxY7veB0a6FXcjPBGRpIpqROW3wBwgsYnKgLF9e8fplvnzYeRIeOwx+MQnwrFXXsm+p7oatmwJzzduDG3YJ0+GmTNDO/bJk+HAA7PXX3556Iw6enTnCcghh5TqJxyU4m6EJyJSjopJVP4MXG1mcwgN3nbknnT3G6MMbEDatq1j0em6dWEE44ADQoIyeXLoFZLv6KPDBnWVlWGJ7X77ddwDxj0kHaedBgsWdD0CMn586X5OKSj2RngiImWomETl3wnt8t+VfuRyYHAnKg0N8MILHRORY48NoyEQOpxm2rDnmjEjJCrDhsGcOVBT07H4dPr0cO3MmfDcc13HMkiLVJOuZI3wREQGsGIavvVLswoz+yTwRWBf4D/c/br++NwurV4NDz/cviNqZv+XH/wgXHPZZfCd73R8b01NNlH52MfguOPaj4RkClQzMn1DZMApuhGeiIgUNaICgIVqvynAOndvjT4kVgAfBP6rBPfunfvvD9MpuUaPbr80t66uY0fUSZPCVE3Gl77UP/FKIpkZdSfOZsmy5TQ1t5BKpWhLr/o5+aTZKqQVESmgmFU/w4GvA58kJDgz3f1FM/tvYK27fyPSwMxuBh4tZkSlZKt+1qxp3xV14sTCG9HJoNDXPijqoyIi0l5Uq36uAmYBxwC/zzn+AHAJEGmi0hNmthBYmHldU1NTmg/afffwkEEvij4oZkbtxDEqnBUR6YFU95f8y/uBz7v7HwnFsxlPAnv39CZm9qCZberkMbX7O2S5+zXuXpt5jBw5spi3ixTF3VmybDmNTS20tTk7drbS1uY0NrWwdNnyDsuORUSk74oZUakBthQ4Ppq8pcpdcfcji/hMkcRQHxQRkf5XzIjKvcAZOa/dzHYBzgfujjQqkQTK9EEpJNMHRUREolVMovIfwHvNbDkwDPgeoUvtW4FzogrIzD5uZvXAKcClZlZvZrOiur9Ib6kPiohI/yumj8qLZvZm4MOE5GQksAT4kbtvjSogd18MLI7qfiJRUR8UEZH+16NEJT3FczXwdXe/tbQhiSST+qCUjpZsi0hniumjsgWY5e4vljak3ht0uydLLPQ/1WhFseRbRMpbV31UiqlR+SlQF01IIuUr0wflLTNrqZ04RklKH2jJt4h0p5jlyVuA88zsBOBvQLslDu5+QZSBiUhxynGkR0u+RaQ7xSQqhwB/JYzCHJx3Tr/2iMSoXKdPMku+Wwsspsos+VaiIjK4FbPq5+hSBiIivZM7feLutLWFvUIz0yfz6+YmdmRFS75FpDvF1KgAYGaTzewIMxtWioBEpDg9mT5JqsyS7/xESku+RSSjx4mKmY0xs98C9cD9wJT08e+a2aISxSci3SjnjrmZJd+jqiupSBlDh1RQkTJGV2vJt4gExdSoXAu0ArXAMznHlwDfAr4cYVwi0kPlPn1SXTWcBXVzy64QWET6RzGJygnAUe6+Nu8/IM8D0yKNSkR6bCB0zM0s+VbhrIjkK6ZGpbOkZgoQWQt9ESmOpk9EZCArpjPtjwhTP/MJPVX2B5qBXwBPu/unSxRjj6kzrQxm5dhHRUQEuu5MW0yiMo5QjzIT2A14DtgdeBw40d0bowm395SoiIiIlJ+uEpVi+qhsAuaZ2ZFkd09eCfze1edaRERESqDLRMXMWoFJ7r7RzP4AfMDdHwQe7JfoREREZFDrrph2KzA2/XweMLSk0YiIiIjk6G7q53fAvWb2VPr1L8zsjUIXuvsxkUYmIiIig153icrHgQ8BewFHAcuB10odlIiIiAh0n6h8GPipu283s2nAhe7e3A9xiYiIiHRbo3IjUJN+fiqQ3P3iRcqIu1O//lWeeLae+vWvdthQUEREgu5GVNYCJ6c3IzSg1sx2LXShu6+JOjiRgaipeRtLli1nS3MLFakUrW1t1FRVUnfibKqr9LuAiEiuLhu+mdkC4HpgWFf3ANzdKyKOrWhq+CZJ5+7ctOSBgvvyjK6uZH7dXHWTFZFBp9cN39z9JjNbTNjP50XgMGBT9CGKDA4NGzbT1Lytw1SPu7OluYWGDZu1MZ+ISI5uO9O6+w5glZkdDaxw952lD0tkYGpsaiGVMlrbOp5LpVI0NrUoURERydFdZ9pjgAfSyUkFMLezYWl3/0P04YkMLKOqK2ltK5ClAG1tbYyqruzniEREkq27EZV7gInAxvTzzjghkRGRLkyZMJqaqsqCNSo1VWHHYxERyepyebK7p9x9Y87zzh5KUkR6wMyoO3E2o6orqUgZQ4dUUJEKhbQnnzRbhbQiInm6XPVTbrTqR8qFu9OwYTONTS2Mqg4jKUpSRGSw6vWqn5wb1ABfBt4DTCdM9awC7gCuVLdakeKYGbUTx6hwVkSkG90mKmY2AvgjYRfl24CnCb1T3gScDrzbzA5z95ZSBioiIiKDT09GVL5IqGXZz91fzT1hZpcBDwFfAK6IPjwREREZzLrb6wfCdM9l+UkKgLu/AlwOvD+qgMzsa2b2lJmtNLPl6SXSIiIiMgj1ZERlJvBIF+cfSV8TlQeBS919m5kdANxnZpPc/fUIP0NERETKQE9GVKqApi7ONwEjowkH3P0ud9+Wfvk4oT/LuKjuLyIiIuWjJyMqBkwzs86SkfHpa0phAfCCuxdcc2xmC4GFmdc1NTUlCkNERETi0G0fFTNrIyxH7vQSitg92cweBPbt5PQsd38pfd3bgZuA49z9mZ7cW31UREREyk9f+6jsEWUw7n5kd9eY2VGEJOXdPU1SREREZODpye7Jq/sjkAwzmwvcCrzX3Vf252eLiIhIsvSoM20/+wEwDLgpp6X4J9z98fhCEhERkTgkLlFx9xlxxyAiIiLJ0JPlySIiIiKxUKIiIiIiidXjRMXM/mBmp3XRT0VEREQkUsWMqDwMXAisN7PFZnas5VS7ioiIiEStx4mKu3/V3fcE3gG0AD8HXjKzRWb25lIFKCIiIoNX0TUq7v6Au38GmAh8E/gi8Hcze9TMPmVmqnsRERGRSBS9PDldo1IHnAocAdwD3AJMAc4HTkqfFxEREemTHicqZnYi8AngvcAaQnLyMXdfl3PNPcDyqIMUERGRwamYEZXbgJ8Cb3f3Rzq55jngij5HJSIiIkJxicokd9/e1QXuvg24uG8hiYiIiATFFL5+zMw61J6YWZ2ZzY8uJBEREZGgmETlK8DLBY6vT58TERERiVQxiUotoYg2XwMwNZpwRERERLKKSVReAo4scHwusDaacERERESyiimm/TZwrZmNBe5PH5sHXEBorS8iIiISqR4nKu5+rZltA84Dvp4+vAY4x92/X4rgREREZHArqjOtu98A3JDuTmvu3lyasERERER60UIfwN23Rh2IiIiISL5iWuingM8Q9vGZCgzNPZ/eWVlEREQkMsWs+rkI+CpwN7A7Ya+fe4Fq4PrIIxMREZFBr5ipn1OBT7r778zsfOBH7v68mf0FOLY04YmIiMhgVsyIyjjg6fTzLcCY9PPfASdEGZSIiIgIFJeoPAfslX7+D2C+mVUDHwY2Rx2YiIiISLEN3/ZIP78YuBM4A9hBKLIVERERiVQxDd9uzHn+RzPbHdgHWOPuhTYrFBEREemTHk39mNkuZtZoZvtljrn7VndfoSRFRERESqVHiYq7vwFsIq93ioiIiEgpFVNM+1XgKjObWqpgRERERHIVU0x7FWGJ8ioz2wy05J50992jDExERESkmETl/JJFISIiIlJAMat+billICIiIiL5itmUsMtNB939xb6HA2Z2OfAeoDV96Ap3/2kU9xYREZHyYu7eswvN2gAHLH2o3RvdvSKSgMxGuXtj+vlkQtv+ae7ebffb2tpar6+vjyIMERER6Sdm1uDutYXOFVOjskfe66HA/sB5wIW9jK2DTJKSVkVIiIpZnSQiIiIDRDE1KqsLHH7ezF4Bria01I+EmZ0FnAnUEnZsfqWT6xYCCzOva2pqogpBREREEqDHUz+d3sDszcCf3b2qh9c/COzbyelZ7v5SzrUHAIuBeZ0lK7k09SMiIlJ+Ipn6MbNj8g8BE4EvAMt7eh93P7KIa1eaWQMwD1ja0/eJiIjIwFBMjco9ea8deBl4ADg7qoDMbF93fyr9fC9gFvBkVPcXERGR8lFMjUp/FbQuMrO9gR3ATuDzmcRFREREBpdiRlT6hbu/N+4YREREJBl6PEpiZreb2ZcKHD/bzJZEG5aIiIhIcf1JjgJ+W+D4XelzIiIiIpEqJlHZtYt7jIggFhEREZF2iklU/gycVeB4UcuTRURERHqqmGLac4F7zGwOcH/62Fxgd+DYqAMTERER6fGIiruvAGYAdwCTgSnp5zPd/a+lCU9EREQGs6KWJ7v7JiLcgFBERESkK8UsT/6kmdUVOF5nZvMjjUpERESE4oppv0JomZ9vffqciIiISKSKSVRqgTUFjjcAU6MJR0RERCSrmETlJaDQzsdzgbXRhCMiIiKSVUwx7beBa81sLNnlyfOAC1CBrSScu9OwYTONTS2Mqq5kyoTRmFncYYmISDeK2T35WjPbBpwHfD19eA1wjrt/vxTBiUShqXkbS5YtZ0tzCxWpFK1tbdRUVVJ34myqq4bHHZ6IiHShmKkf3P0Gd58OVAM17j5dSYokmbuzZNlyGptaaGtzduxspa3NaWxqYemy5bh73CGKiEgXijhJ5xAAAAyuSURBVEpUMtx9q7s3m9mE9O7Jf486MJEoNGzYTFPztg4JibuzpbmFhg2bY4pMRER6ouhExcyGmdkHzew3hALb04BfRh6ZSAQam1pIpQrXoqRSKRqbWvo5IhERKUaPa1TM7HBgPnAKYUnym4Dj3P3e0oQm0nejqitpbWsreK6trY1R1ZX9HJGIiBSj2xEVMzvfzJ4Ffkho+DbX3d8COLChxPGJ9MmUCaOpqarssMLHzKipCqt/REQkuXoy9XMRYWrnLe5+nrs/XtqQRKJjZtSdOJtR1ZVUpIyhQyqoSBmjqys5+aTZWqIsIpJwPZn6OR34BLDezH4N/Bj4fUmjEolQddVwFtTNVR8VEZEyZD1dnmlmuxMSllOBccAoQiHtj929tWQRFqG2ttbr6+vjDkNERESKYGYN7l5b6FyPV/24+xp3v9zd9wHeDdwAXAtsNLNboglVREREJKu3fVT+6O7/D5gIfBYYG2lUIiIiIvQyUclw9zfc/Wfu/q6oAhIRERHJ6FOiIiIiIlJKSlREREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgkVmITFTObZ2atZvb5uGMRERGReCQyUTGzKuBK4K64YxEREZH4JDJRAa4B/hvYFHcgIiIiEp/EJSpmdhIwyt2XxB2LiIiIxGtIf3+gmT0I7NvJ6VnAIuC4Ht5rIbAw87qmpqbP8YmIiEhymLvHHcO/mNkRwO1AS/rQOGA7cJ27X9jd+2tra72+vr6EEYqIiEjUzKzB3WsLnev3EZWuuPtDwG6Z12Z2M/Cou18XW1AiIiISm8TVqIiIiIhkJGpEJZ+7z487BhEREYmPRlREREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgklhIVERERSSwlKiIiIpJYSlREREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgklhIVERERSSwlKiIiIpJYSlREREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgklhIVERERSSwlKiIiIpJYSlREREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgklhIVERERSSwlKiIiIpJYiUtUzOxmM6s3s8fSj/+OOyYRERGJx5C4A+jEIne/Lu4gREREJF6JG1ERERERyTB3jzuGdszsZmAusBVYA5zv7o91cu1CYGHOoYnA+ohCGZmOQfpO32V09F1GR99ldPRdRmewfpfj3X1YoRP9nqiY2YPAvp2cngW0Aevcvc3M3g/8DzDD3fv1D87M6t29tj8/c6DSdxkdfZfR0XcZHX2X0dF32VG/16i4+5FFXPsLM1sE7AOsKF1UIiIikkSJq1Exs9qc53OAscDz8UUkIiIicUniqp+bzWwC0ApsA05x9y0xxHFNDJ85UOm7jI6+y+jou4yOvsvo6LvMk7hiWhEREZGMxE39iIiIiGQoUREREZHEUqLSCTO70MzczN4SdyzlysxWmdnTOdshfCjumMqVmQ0zs+vM7Dkz+4eZLY47pnJkZqNy/j4+ZmbPmtlOMxsTd2zlyMxOMLMVZvY3M3vCzE6LO6ZyZWYnmtmjZvZ3M/uzmR0Qd0xJkcRi2tiZ2UHAHELDOembOnd/Iu4gBoBFhB5DM93dzWxS3AGVI3dvBA7MvDazc4Cj3P3V+KIqT2ZmwG3A0e7+dzObDjxtZre7e3OswZUZMxsNLAaOdPenzOwo4EeAflFGIyodmNkw4Hrgc4AqjSV2ZjYCWAB8xdPV7+6+Lt6oBowFwA/iDqLMjUr/sxp4BdgeYyzlai9go7s/BeDu9wPT0r80D3pKVDq6BFjs7v+MO5AB4kdm9riZfd/MxscdTJnai/A/gPPTQ8MPmtnb4w6q3JnZYYQ+TXfGHUs5SifNHwRuN7PVwEPAae7+RryRlaXngPHp3mGku7KPBKbHGVRSKFHJkf4P16GEtv3Sd3Pd/QDgIML/aG+JOZ5yNRTYE3jS3Q8BPg/8RIlfn30S+KG774w7kHJkZkOA/wLe6+7TgLcDt6jep3jpXmEnA4vMbAUwD3gS2BFnXEmhPio5zOzLwFlA5jeCWmADcLq73xVbYANAuqbiWXevijuWcmNm4wh/D3dx99b0seXAue5+X5yxlav0dNo6YLa7Px13POXIzA4hJHr75Rz7C+Hv5b3xRVb+0iUI64FD3X3Qd2bXiEoOd1/k7pPdfbq7TwfqgROUpBTPzEaY2aicQx8B/hZXPOXM3TcB/wecAGBm04A9gGfijKvMnQL8XUlKn7wE1JrZPgBmtjdhmvLZWKMqU3kF8l8F/qAkJdCqHymVCcBSM6sADHgRODXekMra/wNuNLMrCdtLfEYFtX3yKVRE2yfuvsHMzgCWmFkb4d/zz7l7Q8yhlatLzewIwv+X/0T4Oypo6kdEREQSTFM/IiIiklhKVERERCSxlKiIiIhIYilRERERkcRSoiIiIiKJpURFREREEkuJiohEwswOMbNn0r1zMLObzWxx3HHFwcymmVmDmY2MOxaRcqdERWSQMLP7zOyyEn7EZcBVmTb/wBeAM6O4sZldZmb3RXGv/uDuqwndhP8j7lhEyp0SFRHpMzPbC5gL/DxzzN23pDdbG6wWA58xM/13VqQP9C+QiGBmp5rZ82a23cweN7OT8s5/1MzWmNlrZnaLmV2dN8JRBzzs7k0572k39WNmq8zsbDP7efo+T5nZMTnn9zSzZWbWlH48YmZ7m9l84DzgKDPz9GO6mU0wsyVmtt7Mms3sATM7MOd+09PXvs/Mlqc/8z4z2z3vZzsr52d/0cxOzzl3UPo929LxX5zeNRgLrkhP8byefu8ZObf+AzAOmNOrPxQRAZSoiAx6ZnY4cCNwLbA/8AvgDjObnj6/D/BD4H+Bgwibzn0m7zb/Bvy1Bx93LvBr4EDgQWCxme2SPncdYZfoQ4FD0vG0AT8FvknY/2RS+vESMBx4ADgOOBh4EviVme2a95kXAf8JzAYqgW/k/OyfJkxZXQ7sR9hfpSl9bixwN/Bb4K3AfOCjwNnpt5+Sfv1BYJ/0ezdk7u3uO4GV6e9GRHpJmxKKyFnA7e5+bfr1BWZ2PKG+5EvA6cCf3f2K9PnLzeydefeYRhhB6M5Sd/8hgJldCHwamAk8AUwFfuzumV2h/7ULr5m9Brzh7utz7rWKkMxkrjkT2EJISB7Iue5r7n5v+ppvAN/OOXc+cIm735R+/ULOuTOBe939qvTr59MxXwJcmY73OeCPHjZNW13g511H+G5EpJc0oiIi+wB/zjv2p/RxgBnAirzzj+a93hXY3oPPejzneSbp2C39z/8Bvm9mvzOzc8xsalc3MrOhZva19BRSIyFJqSQkEF195lgzqzCzKmB34L5OPuKtwHvMbGvmQdhxeXq67mQpYRTmKTP7hpkdVeAe2wgjPyLSS0pURMR6cL67bdZfAUb14LN2ZJ54duv2VPr1/wL7EqZaTgSeNrMju7jXfwKnEUZFjiBMJzUCQzv7TLI/h9H9zz0S+En6vpnHW4E3uXubu68iJHHnp6/9tZl9O+8eY4BN3XyOiHRBiYqIPE3Hgs/D0schTMEcnHc+//VK4E19DcTdX3T3b7n7scD9wEfSp3YAFXmXzwF+7u5L3f0JwojO6CI+qwlYA8zr5JKVwH7u/nz+I+cer7n7Enf/NGGK7FN599gvfR8R6SXVqIgMLhNyV8ak/Q/wBzP7PPB74OPALODD6fPfB75oZv9JKLT9AGFkIXc66G7gKvogXT/yG+B5wvTN/sDv0qdXA/uY2ZsIIxSvEupJTjSzg9LXXA28XuTHXgZ83cw2EepapgAT3f3nwPXAGWZ2A6HQ93XgAGCmu19mZqcRRmUeAVqB9wGZ+hrMrDZ9v3uLjElEcmhERWRwOR34W94jBXyS0JzsCeD9wPvSUxuki1tPAz6fvn4/4Fba16TcCdSY2aw+xDYU+B5hJOfHwG2EBAFgCbAc+AvwMqG25DLgn8BDhHqR7xGmoHrM3W8ALkw/ngJuAqrS514i9IaZCjyc/uxzCKMwEGpiPpeOazlhmufDObc/Bfidu68rJiYRac+y08QiIj1jZvcAz7j7mTnH/guY4e6fjC+yZEgX2z4FfMrdH4o7HpFyphEVEemWmZ1pZrPMbIaZnQccA/wo77JvEZbw5teSDEaTgWuVpIj0nUZURKRbZva/hNqUKkJx7cXu/ot4oxKRwUCJioiIiCSWpn5EREQksZSoiIiISGIpUREREZHEUqIiIiIiiaVERURERBJLiYqIiIgk1v8HsDcOStCMXVsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x320 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "nolet_df = performance_df.drop(15) # drop the letter dataset\n",
    "nolet_acc_diff = nolet_df['ucm_acc'] - nolet_df['mv_acc']\n",
    "\n",
    "figure(figsize=(8, 4), dpi=80)\n",
    "plt.xlabel('Log(instances)', fontsize=12)\n",
    "plt.ylabel('Accuracy Difference', fontsize=12)\n",
    "plt.ylim(-5, 1)\n",
    "plt.scatter(np.log(nolet_df['instances']), nolet_acc_diff, color='#8c9bb5')\n",
    "\n",
    "slope, intercept = np.polyfit(np.log(nolet_df['instances']), nolet_acc_diff, 1)\n",
    "x_min = np.log(nolet_df['instances']).min()\n",
    "y_min = slope*x_min + intercept\n",
    "x_max = np.log(nolet_df['instances']).max()\n",
    "y_max = slope*x_max + intercept\n",
    "plt.plot([x_min, x_max], [y_min, y_max], color='red', linestyle='--')\n",
    "\n",
    "y_true = nolet_acc_diff \n",
    "y_fit = slope*np.log(nolet_df['instances']) + intercept \n",
    "print(f\"R^2 score: {r2_score(y_true, y_fit)}\")\n",
    "\n",
    "plt.savefig('instances-acc-no-let-graph.png', bbox_inches=\"tight\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comment**: If we drop this outlier, the line of best fit seems much better and the $R^2$ score is also higher."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}