ki-dhbw/Aufgaben/01 - linear regression - 1 ...

1104 lines
161 KiB
Plaintext
Raw Permalink Normal View History

{
"cells": [
{
"cell_type": "markdown",
"id": "9496e038",
"metadata": {},
"source": [
"# Lineare Regression mit 1 Feature ($d=1$)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5754d665",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"# plotting settings\n",
"pd.plotting.register_matplotlib_converters()\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns"
]
},
{
"cell_type": "markdown",
"id": "282549b7",
"metadata": {},
"source": [
"Wir verwenden hier beispielhaft den Datensatz [Melbourne Housing Snapshot](https://www.kaggle.com/datasets/dansbecker/melbourne-housing-snapshot). Diesen finden Sie auch im Moodle unter `data/melb_data.csv`."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cfe20800",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',\n",
" 'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',\n",
" 'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',\n",
" 'Longtitude', 'Regionname', 'Propertycount'],\n",
" dtype='object')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"melbourne_file_path = 'data/melb_data.csv'\n",
"melbourne_data = pd.read_csv(melbourne_file_path)\n",
"melbourne_data = melbourne_data.dropna(axis=0) # entfernen von Daten mit fehlenden Werten\n",
"melbourne_data.columns # Spaltennamen der Tabelle (potentielle Features)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e13b23ac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Suburb</th>\n",
" <th>Address</th>\n",
" <th>Rooms</th>\n",
" <th>Type</th>\n",
" <th>Price</th>\n",
" <th>Method</th>\n",
" <th>SellerG</th>\n",
" <th>Date</th>\n",
" <th>Distance</th>\n",
" <th>Postcode</th>\n",
" <th>...</th>\n",
" <th>Bathroom</th>\n",
" <th>Car</th>\n",
" <th>Landsize</th>\n",
" <th>BuildingArea</th>\n",
" <th>YearBuilt</th>\n",
" <th>CouncilArea</th>\n",
" <th>Lattitude</th>\n",
" <th>Longtitude</th>\n",
" <th>Regionname</th>\n",
" <th>Propertycount</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Abbotsford</td>\n",
" <td>25 Bloomburg St</td>\n",
" <td>2</td>\n",
" <td>h</td>\n",
" <td>1035000.0</td>\n",
" <td>S</td>\n",
" <td>Biggin</td>\n",
" <td>4/02/2016</td>\n",
" <td>2.5</td>\n",
" <td>3067.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>156.0</td>\n",
" <td>79.0</td>\n",
" <td>1900.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.8079</td>\n",
" <td>144.9934</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Abbotsford</td>\n",
" <td>5 Charles St</td>\n",
" <td>3</td>\n",
" <td>h</td>\n",
" <td>1465000.0</td>\n",
" <td>SP</td>\n",
" <td>Biggin</td>\n",
" <td>4/03/2017</td>\n",
" <td>2.5</td>\n",
" <td>3067.0</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>134.0</td>\n",
" <td>150.0</td>\n",
" <td>1900.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.8093</td>\n",
" <td>144.9944</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abbotsford</td>\n",
" <td>55a Park St</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>1600000.0</td>\n",
" <td>VB</td>\n",
" <td>Nelson</td>\n",
" <td>4/06/2016</td>\n",
" <td>2.5</td>\n",
" <td>3067.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>120.0</td>\n",
" <td>142.0</td>\n",
" <td>2014.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.8072</td>\n",
" <td>144.9941</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Abbotsford</td>\n",
" <td>124 Yarra St</td>\n",
" <td>3</td>\n",
" <td>h</td>\n",
" <td>1876000.0</td>\n",
" <td>S</td>\n",
" <td>Nelson</td>\n",
" <td>7/05/2016</td>\n",
" <td>2.5</td>\n",
" <td>3067.0</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>245.0</td>\n",
" <td>210.0</td>\n",
" <td>1910.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.8024</td>\n",
" <td>144.9993</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Abbotsford</td>\n",
" <td>98 Charles St</td>\n",
" <td>2</td>\n",
" <td>h</td>\n",
" <td>1636000.0</td>\n",
" <td>S</td>\n",
" <td>Nelson</td>\n",
" <td>8/10/2016</td>\n",
" <td>2.5</td>\n",
" <td>3067.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>256.0</td>\n",
" <td>107.0</td>\n",
" <td>1890.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.8060</td>\n",
" <td>144.9954</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Suburb Address Rooms Type Price Method SellerG \\\n",
"1 Abbotsford 25 Bloomburg St 2 h 1035000.0 S Biggin \n",
"2 Abbotsford 5 Charles St 3 h 1465000.0 SP Biggin \n",
"4 Abbotsford 55a Park St 4 h 1600000.0 VB Nelson \n",
"6 Abbotsford 124 Yarra St 3 h 1876000.0 S Nelson \n",
"7 Abbotsford 98 Charles St 2 h 1636000.0 S Nelson \n",
"\n",
" Date Distance Postcode ... Bathroom Car Landsize BuildingArea \\\n",
"1 4/02/2016 2.5 3067.0 ... 1.0 0.0 156.0 79.0 \n",
"2 4/03/2017 2.5 3067.0 ... 2.0 0.0 134.0 150.0 \n",
"4 4/06/2016 2.5 3067.0 ... 1.0 2.0 120.0 142.0 \n",
"6 7/05/2016 2.5 3067.0 ... 2.0 0.0 245.0 210.0 \n",
"7 8/10/2016 2.5 3067.0 ... 1.0 2.0 256.0 107.0 \n",
"\n",
" YearBuilt CouncilArea Lattitude Longtitude Regionname \\\n",
"1 1900.0 Yarra -37.8079 144.9934 Northern Metropolitan \n",
"2 1900.0 Yarra -37.8093 144.9944 Northern Metropolitan \n",
"4 2014.0 Yarra -37.8072 144.9941 Northern Metropolitan \n",
"6 1910.0 Yarra -37.8024 144.9993 Northern Metropolitan \n",
"7 1890.0 Yarra -37.8060 144.9954 Northern Metropolitan \n",
"\n",
" Propertycount \n",
"1 4019.0 \n",
"2 4019.0 \n",
"4 4019.0 \n",
"6 4019.0 \n",
"7 4019.0 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"melbourne_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8680d0c9",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='BuildingArea', ylabel='Price'>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHACAYAAACMB0PKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABziElEQVR4nO3deXhTZdo/8G+SZu1OQoFiC4VUCpSlWEBoiwq8AgIiMo5TmfmVZXSUTUcdAR0ERAXcxgHUd0Yp4jsKM46CAu6gQgGVpexbC5WiLKWlTZc0+/n9URKSZmshbdL2+7kurkvOOTl5cgRy93nu+35EgiAIICIiIgpB4mAPgIiIiMgbBipEREQUshioEBERUchioEJEREQhi4EKERERhSwGKkRERBSyGKgQERFRyGKgQkRERCGLgQoRERGFLAYqREREFLJaTaCyfft2jB8/HvHx8RCJRNi4cWOj7yEIAl555RXcfPPNkMvl6Ny5M1544YXAD5aIiIgaJCzYAwiUmpoa9OvXD9OmTcO99957Xfd49NFH8dVXX+GVV15Bnz59cOXKFVy5ciXAIyUiIqKGErXGTQlFIhE2bNiAe+65x3HMaDTimWeewbp161BRUYHU1FQsX74ct99+OwDg+PHj6Nu3L44cOYIePXoEZ+BERETkotUs/fgza9Ys7N69G+vXr8ehQ4dw3333YfTo0SgoKAAAbNq0Cd26dcPmzZuRlJSErl274o9//CNnVIiIiIKoTQQqxcXFWLNmDT788ENkZWWhe/fuePLJJ5GZmYk1a9YAAM6cOYOzZ8/iww8/xHvvvYd3330X+/btw29+85sgj56IiKjtajU5Kr4cPnwYVqsVN998s8txo9EItVoNALDZbDAajXjvvfcc161evRq33HILTp48yeUgIiKiIGgTgUp1dTUkEgn27dsHiUTici4iIgIA0KlTJ4SFhbkEMz179gRQNyPDQIWIiKj5tYlAJS0tDVarFSUlJcjKyvJ4TUZGBiwWC06fPo3u3bsDAE6dOgUA6NKlS7ONlYiIiK5pNVU/1dXVKCwsBFAXmLz22mu444470K5dOyQmJuL3v/89du7ciVdffRVpaWm4fPkytm7dir59+2Ls2LGw2WwYOHAgIiIi8Prrr8Nms2HmzJmIiorCV199FeRPR0RE1Da1mkDlu+++wx133OF2PCcnB++++y7MZjOef/55vPfee/j111+h0Whw6623YvHixejTpw8A4Pz585g9eza++uorhIeHY8yYMXj11VfRrl275v44REREhFYUqBAREVHr0ybKk4mIiKhlYqBCREREIatFV/3YbDacP38ekZGREIlEwR4OERERNYAgCKiqqkJ8fDzEYt9zJi06UDl//jwSEhKCPQwiIiK6DufOncNNN93k85oWHahERkYCqPugUVFRQR4NERERNURlZSUSEhIc3+O+tOhAxb7cExUVxUCFiIiohWlI2gaTaYmIiChkMVAhIiKikMVAhYiIiEIWAxUiIiIKWQxUiIiIKGQxUCEiIqKQxUCFiIiIQhYDFSIiIgpZDFSIiIgoZDFQISIiopDVolvohzKd3oTSahMqDWZEKaXQhMsQrZIFe1hEREQtCgOVJnC+ohZzPzqEHQWljmPDkjVYNqkv4mOUQRwZERFRy8KlnwDT6U1uQQoAbC8oxbyPDkGnNwVpZERERC0PA5UAK602uQUpdtsLSlFazUCFiIiooRioBFilwezzfJWf80RERHQNA5UAi1JIfZ6P9HOeiIiIrmGgEmCaCBmGJWs8nhuWrIEmgpU/REREDcVAJcCiVTIsm9TXLVgZlqzB8kl9WaJMRETUCCxPbgLxMUqszE5DabUJVQYzIhVSaCLYR4WIiKixGKg0kWgVAxMiIqIbxaUfIiIiClkMVIiIiChkMVAhIiKikMVAhYiIiEIWAxUiIiIKWQxUiIiIKGQxUCEiIqKQxUCFiIiIQhYDFSIiIgpZDFSIiIgoZDFQISIiopDFQIWIiIhCFgMVIiIiClkMVIiIiChkMVAhIiKikMVAhYiIiEIWAxUiIiIKWQxUiIiIKGQxUCEiIqKQxUCFiIiIQhYDFSIiIgpZDFSIiIgoZDFQISIiopDFQIWIiIhCFgMVIiIiClkMVIiIiChkMVAhIiKikBXUQMVqtWLBggVISkqCUqlE9+7dsWTJEgiCEMxhERERUYgIC+abL1++HG+99RbWrl2L3r17Y+/evZg6dSqio6MxZ86cYA6NrtLpTSitNqHSYEaUUgpNuAzRKlmwh0VERG1EUAOVXbt2YcKECRg7diwAoGvXrli3bh1++umnYA6LrjpfUYu5Hx3CjoJSx7FhyRosm9QX8THKII6MiIjaiqAu/QwdOhRbt27FqVOnAAAHDx5EXl4exowZE8xhEepmUuoHKQCwvaAU8z46BJ3eFKSRERFRWxLUGZV58+ahsrISKSkpkEgksFqteOGFFzB58mSP1xuNRhiNRsfvKysrm2uobU5ptcktSLHbXlCK0moTl4CIiKjJBXVG5T//+Q/ef/99fPDBB9i/fz/Wrl2LV155BWvXrvV4/dKlSxEdHe34lZCQ0MwjbjsqDWaf56v8nCciIgoEkRDEEpuEhATMmzcPM2fOdBx7/vnn8a9//QsnTpxwu97TjEpCQgJ0Oh2ioqKaZcxtxemSaox47Xuv57c+fhu6x0U044iIiKi1qKysRHR0dIO+v4O69KPX6yEWu07qSCQS2Gw2j9fL5XLI5fLmGFqbp4mQYViyBts9LP8MS9ZAE8FlHyIianpBXfoZP348XnjhBWzZsgU///wzNmzYgNdeew0TJ04M5rAIQLRKhmWT+mJYssbl+LBkDZZP6sv8FCIiahZBXfqpqqrCggULsGHDBpSUlCA+Ph7Z2dl49tlnIZP5/yJszNQRXR97H5UqgxmRCik0EeyjQkREN6Yx399BDVRuFAMVIiKilqcx39/c64eIiIhCFgMVIiIiClkMVIiIiChkBbU8ma4fNwskIqK2gIFKC8TNAomIqK3g0k8Lw80CiYioLWGg0sI0ZLNAIiKi1oKBSgvDzQKJiKgtYaDSwkQppD7PR/o5T0RE1JIwUGlh7JsFesLNAomIqLVhoNLCcLNAIiJqS1ie3ALFxyixMjuNmwUSEVGrx0ClhYpWMTAhIqLWj0s/REREFLIYqBAREVHIYqBCREREIYuBChEREYUsBipEREQUshioEBERUchioEJEREQhi4EKERERhSwGKkRERBSyGKgQERFRyGKgQkRERCGLgQoRERGFLAYqREREFLIYqBAREVHIYqBCREREIYuBChEREYUsBipEREQUshioEBERUchioEJEREQhi4EKERERhSwGKkRERBSyGKgQERFRyGKgQkRERCGLgQoRERGFLAYqREREFLIYqBAREVHIYqBCREREIYuBChEREYUsBipEREQUshioEBERUchioEJEREQhi4EKERERhSwGKkRERBSyGKgQERFRyAoL9gCoeej0JpRWm1BpMCNKKYUmXIZolSzYwyIiIvKJgUobcL6iFnM/OoQdBaWOY8OSNVg2qS/iY5RBHBkREZFvXPpp5XR6k1uQAgDbC0ox76ND0OlNQRoZERGRfwxUWrnSapNbkGK3vaAUpdUMVIiIKHQxUGnlKg1mn+er/JwnIiIKJgYqrVyUQurzfKSf80RERMHEQKWV00TIMCxZ4/HcsGQNNBGs/CEiotDFQKWVi1bJsGxSX7dgZViyBssn9WWJMhERhTSWJ7cB8TFKrMxOQ2m1CVUGMyIVUmgi2EeFiIhCHwOVVspTg7fucRHBHhYREVGjMFBphdjgjYiIWgvmqLQybPBGREStCQOVVoYN3oiIqDVhoNLKsMEbERG1JgxUWhk2eCMiotaEgUorwwZvRETUmjBQaWXY4I2IiFoTlie3QmzwRkRErUXQZ1R+/fVX/P73v4darYZSqUSfPn2wd+/eYA+rxYtW1TV4658Yi+5xEQxSiIioRQrqjEp5eTkyMjJwxx134PPPP0f79u1RUFCA2NjYYA6LiIiIQkRQA5Xly5cjISEBa9ascRxLSko
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=melbourne_data['BuildingArea'], y=melbourne_data['Price'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "da3b8409",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 79.00\n",
"2 150.00\n",
"4 142.00\n",
"6 210.00\n",
"7 107.00\n",
" ... \n",
"12205 149.00\n",
"12206 115.00\n",
"12207 35.64\n",
"12209 61.60\n",
"12212 388.50\n",
"Name: BuildingArea, Length: 6196, dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"melbourne_data['BuildingArea']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c1172236",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>BuildingArea</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>79.0</td>\n",
" <td>1035000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>150.0</td>\n",
" <td>1465000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>142.0</td>\n",
" <td>1600000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>210.0</td>\n",
" <td>1876000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>107.0</td>\n",
" <td>1636000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" BuildingArea Price\n",
"1 79.0 1035000.0\n",
"2 150.0 1465000.0\n",
"4 142.0 1600000.0\n",
"6 210.0 1876000.0\n",
"7 107.0 1636000.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# wählen für unser Beispiel einen kleinen Ausschnitt aus den Daten\n",
"max_area = 400\n",
"max_datapoints = 100\n",
"data = melbourne_data[melbourne_data['BuildingArea'] < max_area][:max_datapoints][['BuildingArea', 'Price']]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8f9dec63",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f1293084",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHACAYAAACMB0PKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6K0lEQVR4nO3de3SU1b3/8c+E3BNmAomRIAEDQVBDgKqlHJCqUIUqB5FaS7U/Lp52qWCl2B6gPbW2XsDraek5xdYK1HUUe6iCS1pFCwJyqQVMuAgiQRQqIAbIhJB78vz+8GSaIZOZZDIzz56Z92utrEVmJk/2PEzm+cze3723w7IsSwAAAAZKsLsBAAAA7SGoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjxUxQ2bRpkyZOnKjevXvL4XBo9erVnT6GZVl68skndckllyglJUUXXXSRHnnkkdA3FgAAdEii3Q0IlXPnzmno0KGaOXOmbrnllqCOcd999+nNN9/Uk08+qSFDhuj06dM6ffp0iFsKAAA6yhGLmxI6HA6tWrVKN998s+e2uro6/eQnP9GKFStUUVGhoqIiPfbYY7rmmmskSfv371dxcbH27t2rQYMG2dNwAADgJWaGfgKZPXu2tm3bppdeekm7d+/WrbfeqvHjx+vgwYOSpNdee039+/fXmjVrVFBQoIsvvlj/9m//Ro8KAAA2iougcuTIES1btkwrV67U1VdfrQEDBuiHP/yhRo8erWXLlkmSPvroI33yySdauXKlnn/+eS1fvlw7d+7UN77xDZtbDwBA/IqZGhV/9uzZo6amJl1yySVet9fV1Sk7O1uS1NzcrLq6Oj3//POexz333HO64oordODAAYaDAACwQVwElaqqKnXr1k07d+5Ut27dvO7LzMyUJOXl5SkxMdErzFx66aWSvuiRIagAABB5cRFUhg8frqamJp08eVJXX321z8eMGjVKjY2NOnTokAYMGCBJ+vDDDyVJ/fr1i1hbAQDAP8XMrJ+qqiqVlZVJ+iKYPP3007r22mvVs2dP9e3bV3fccYe2bNmip556SsOHD9fnn3+udevWqbi4WDfeeKOam5t11VVXKTMzU7/85S/V3NysWbNmyel06s0337T52QEAEJ9iJqhs2LBB1157bZvbp02bpuXLl6uhoUEPP/ywnn/+eX366afKycnRV77yFf385z/XkCFDJEnHjh3TvffeqzfffFMZGRmaMGGCnnrqKfXs2TPSTwcAACiGggoAAIg9cTE9GQAARCeCCgAAMFZUz/ppbm7WsWPH1L17dzkcDrubAwAAOsCyLJ09e1a9e/dWQoL/PpOoDirHjh1Tfn6+3c0AAABBOHr0qPr06eP3MVEdVLp37y7piyfqdDptbg0AAOiIyspK5efne67j/kR1UGkZ7nE6nQQVAACiTEfKNiimBQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjRfUS+gAQq9zV9SqvqldlbYOcaUnKyUiWKz3Z7mYBEUdQAQDDHKuo0byXd+udg+We28YMzNGiKcXqnZVmY8uAyGPoBwAM4q6ubxNSJGnTwXLNf3m33NX1NrUMsAdBBQAMUl5V3yaktNh0sFzlVQQVxBeCCgAYpLK2we/9ZwPcD8QaggoAGMSZmuT3/u4B7gdiDUEFAAySk5msMQNzfN43ZmCOcjKZ+YP4QlABAIO40pO1aEpxm7AyZmCOHptSzBRlxB2mJwOAYXpnpenXU4ervKpeZ2sb1D01STmZrKOC+ERQAQADudIJJoDE0A8AADAYQQUAABiLoAIAAIxFUAEAAMYiqAAAAGMRVAAAgLEIKgAAwFgEFQAAYCyCCgAAMBZBBQAAGIugAgAAjEVQAQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsggoAADAWQQUAABiLoAIAAIxFUAEAAMYiqAAAAGMRVAAAgLEIKgAAwFgEFQAAYCyCCgAAMBZBBQAAGIugAgAAjEVQAQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxjAkqixYtksPh0Jw5c+xuCgAAMIQRQWX79u367W9/q+LiYrubAgAADGJ7UKmqqtLtt9+uZ599Vj169LC7OQAAwCC2B5VZs2bpxhtv1Lhx4+xuCgAAMEyinb/8pZde0nvvvaft27d36PF1dXWqq6vzfF9ZWRmupgEAAAPY1qNy9OhR3XfffXrhhReUmpraoZ9ZuHChXC6X5ys/Pz/MrQQAAHZyWJZl2fGLV69ercmTJ6tbt26e25qamuRwOJSQkKC6ujqv+yTfPSr5+flyu91yOp0RazsAAAheZWWlXC5Xh67ftg39jB07Vnv27PG6bcaMGRo8eLDmzZvXJqRIUkpKilJSUiLVRAAAYDPbgkr37t1VVFTkdVtGRoays7Pb3A4AAOKT7bN+AAAA2mPrrJ/zbdiwwe4mAAAAg9CjAgAAjEVQAQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsggoAADAWQQUAABiLoAIAAIxFUAEAAMYiqAAAAGMRVAAAgLEIKgAAwFgEFQAAYCyCCgAAMBZBBQAAGIugAgAAjEVQAQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsggoAADAWQQUAABiLoAIAAIxFUAEAAMYiqAAAAGMRVAAAgLEIKgAAwFgEFQAAYCyCCgAAMBZBBQAAGIugAgAAjEVQAQAAxiKoAAAAYxFUAACAsQgqAADAWAQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsggoAADAWQQUAABiLoAIAAIxFUAEAAMZKtLsBAMzlrq5XeVW9Kmsb5ExLUk5GslzpyXY3C0AcsbVHZcmSJSouLpbT6ZTT6dTIkSP1+uuv29kkAP/nWEWNZq8o0dinN2ryb7Zq7FMbde+KEh2rqLG7aQDiiK1BpU+fPlq0aJF27typHTt26LrrrtOkSZP0/vvv29ksIO65q+s17+Xdeudgudftmw6Wa/7Lu+WurrepZQDija1DPxMnTvT6/pFHHtGSJUv0t7/9TZdffrlNrQJQXlXfJqS02HSwXOVV9QwBAYgIY2pUmpqatHLlSp07d04jR470+Zi6ujrV1dV5vq+srIxU84C4Ulnb4Pf+swHuB4BQsX3Wz549e5SZmamUlBTdddddWrVqlS677DKfj124cKFcLpfnKz8/P8KtBeKDMzXJ7/3dA9wPAKFie1AZNGiQSktL9e677+ruu+/WtGnTtG/fPp+PXbBggdxut+fr6NGjEW4tEB9yMpM1ZmCOz/vGDMxRTibDPgAiw2FZlmV3I1obN26cBgwYoN/+9rcBH1tZWSmXyyW32y2n0xmB1gHx41hFjea/vFubWtWqjBmYo8emFCsvK83GlgGIdp25fhtTo9KiubnZqw4FgD16Z6Xp11OHq7yqXmdrG9Q9NUk5mayjAiCybA0qCxYs0IQJE9S3b1+dPXtWL774ojZs2KC1a9fa2SwA/8eVTjABYC9bg8rJkyf1//7f/9Px48flcrlUXFystWvX6mtf+5qdzQIAAIawNag899xzdv56AABgONtn/QAAALTHuGJaAPEt1jZCjLXnA0QaQQWAMY5V1LTZY2jMwBwtmlKs3lE4JTrWng9gB4Z+ABgh1jZCjLXnA9iFoALACB3ZCDGaxNrzAezC0A8AI8TaRoix9nxMRQ1Q7COoADBCrG2EGGvPx0TUAMUHhn4AGCHWNkKMted
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ax = sns.scatterplot(x=data['BuildingArea'], y=data['Price'])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "00dc4dee",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuildingArea 79.0\n",
"Price 1035000.0\n",
"Name: 1, dtype: float64\n",
"[[1, 79.0]]\n"
]
}
],
"source": [
"X = []\n",
"Y = []\n",
"for _, row in data.iterrows():\n",
" X.append([1] + [row['BuildingArea']])\n",
" Y.append(row['Price'])\n",
" break\n",
"X = np.array(X)\n",
"Y = np.array(Y)\n",
"print(X[:5], Y[:5])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "efecad93",
"metadata": {},
"outputs": [],
"source": [
"def h_w(x, w):\n",
" return w[0] + w[1]*x"
]
},
{
"cell_type": "markdown",
"id": "e0577f21",
"metadata": {},
"source": [
"## Analytische Lösung der linearen Regression\n",
"\n",
"`np.linalg.solve(A, b)` berechnet $w$ im linearen Gleichungssystem\n",
"\n",
"$ A w = b $\n",
"\n",
"$A$ - Matrix,\n",
"$w$ - Vektor (unsere unbekannten),\n",
"$b$ - Vektor.\n",
"\n",
"Wir suchen die Lösung $w$ im folgenden Gleichungssystem:\n",
"\n",
"$$ X^T X w = X^T Y $$\n",
"\n",
"Mit $A = X^TX$ und $b = X^T Y$ berechnet `np.linalg.solve(A, b)` unsere gesuchten Paramter für die lineare Regression."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "35a78137",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 42 µs, sys: 7 µs, total: 49 µs\n",
"Wall time: 51.3 µs\n"
]
}
],
"source": [
"%%time\n",
"w_ana = np.linalg.solve(X.T @ X, X.T @ Y)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9a6041bd",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[441524.42083181 6024.22929588]\n"
]
}
],
"source": [
"print(w_ana)"
]
},
{
"cell_type": "markdown",
"id": "f51a85af",
"metadata": {},
"source": [
"Plot der analytischen Lösung"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6486ec38",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='BuildingArea', ylabel='Price'>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHACAYAAACMB0PKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKrUlEQVR4nO3de3yT9d0//lfS8ynpIU1poUChpSmUQhVllXPLFKbeeJhzTHejuO2hgtPpdiM7ON1U8Hhv7r7nNicyf7eicxP8yaboWgpykAG2HKQtbUVaOTQ9Jj0mbXJ9/wgNTZumbZrk+iR5PR8PHg+bK7366WWa65XP4f1RSJIkgYiIiEhASrkbQERERDQcBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYTGoEBERkbAYVIiIiEhYDCpEREQkLAYVIiIiElbABJW9e/fixhtvRFpaGhQKBXbs2DHmc0iShOeffx4zZsxAREQEJk6ciKeeesrzjSUiIqJRCZW7AZ7S2dmJOXPmYO3atbjlllvcOseDDz6Ijz76CM8//zxmz56NlpYWtLS0eLilRERENFqKQNyUUKFQYPv27bjpppvsj5lMJvzsZz/Dtm3b0NbWhtzcXDzzzDNYunQpAKCiogJ5eXk4efIksrOz5Wk4EREROQiYoZ+RrF+/HgcPHsRbb72F48eP47bbbsOKFStQXV0NAHj//fcxbdo07Ny5ExkZGZg6dSq+973vsUeFiIhIRkERVOrq6vDaa6/hnXfewaJFizB9+nT8+Mc/xsKFC/Haa68BAL744gucPXsW77zzDl5//XVs3boVR48exTe/+U2ZW09ERBS8AmaOiisnTpyAxWLBjBkzHB43mUxISkoCAFitVphMJrz++uv257366qu48sorUVVVxeEgIiIiGQRFUOno6EBISAiOHj2KkJAQh2OxsbEAgNTUVISGhjqEmZycHAC2HhkGFSIiIt8LiqCSn58Pi8UCvV6PRYsWOX3OggUL0NfXh9raWkyfPh0AcPr0aQDAlClTfNZWIiIiuixgVv10dHSgpqYGgC2YvPjii1i2bBkSExMxefJk3Hnnndi/fz9eeOEF5Ofno7GxEcXFxcjLy8P1118Pq9WKq666CrGxsfjNb34Dq9WKdevWQaVS4aOPPpL5tyMiIgpOARNUSktLsWzZsiGPr1mzBlu3bkVvby+efPJJvP766zh37hw0Gg2+9rWv4YknnsDs2bMBAOfPn8cDDzyAjz76CDExMVi5ciVeeOEFJCYm+vrXISIiIgRQUCEiIqLAExTLk4mIiMg/MagQERGRsPx61Y/VasX58+cRFxcHhUIhd3OIiIhoFCRJQnt7O9LS0qBUuu4z8eugcv78eaSnp8vdDCIiInJDfX09Jk2a5PI5fh1U4uLiANh+UZVKJXNriIiIaDSMRiPS09Pt93FX/Dqo9A/3qFQqBhUiIiI/M5ppG5xMS0RERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYfl1CX0iokBl6DKjqcMMY08vVFFh0MSEQx0dLneziHyOQYWISDDn27qx4e/H8Ul1k/2xxVkabL41D2nxUTK2jMj3OPRDRCQQQ5d5SEgBgL3VTXj078dh6DLL1DIieTCoEBEJpKnDPCSk9Ntb3YSmDgYVCi4MKkREAjH29Lo83j7CcaJAw6BCRCQQVWSYy+NxIxwnCjQMKkREAtHEhmNxlsbpscVZGmhiufKHgguDChGRQNTR4dh8a96QsLI4S4Nnbs3jEmUKOlyeTEQkmLT4KPxudT6aOsxo7+lFXGQYNLGso0LBiUGFiEhA6mgGEyKAQz9EREQkMAYVIiIiEhaDChEREQmLQYWIiIiExaBCREREwmJQISIiImExqBAREZGwGFSIiIhIWAwqREREJCwGFSIiIhIWgwoREREJi0GFiIiIhMWgQkRERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYTGoEBERkbAYVIiIiEhYDCpEREQkLAYVIiIiEhaDChEREQmLQYWIiIiExaBCREREwmJQISIiImExqBAREZGwGFSIiIhIWAwqREREJCwGFSIiIhIWgwoREREJi0GFiIiIhMWgQkRERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYQkTVDZv3gyFQoGHHnpI7qYQERGRIIQIKocPH8Yf//hH5OXlyd0UIiIiEojsQaWjowN33HEHXnnlFSQkJMjdHCIiIhKI7EFl3bp1uP7667F8+XK5m0JERESCCZXzh7/11lv47LPPcPjw4VE932QywWQy2b82Go3eahoREREJQLYelfr6ejz44IN44403EBkZOarv2bRpE9Rqtf1fenq6l1tJREREclJIkiTJ8YN37NiBm2++GSEhIfbHLBYLFAoFlEolTCaTwzHAeY9Keno6DAYDVCqVz9pORERE7jMajVCr1aO6f8s29FNUVIQTJ044PHb33XdDp9Nhw4YNQ0IKAERERCAiIsJXTSQiIiKZyRZU4uLikJub6/BYTEwMkpKShjxOREREwUn2VT9EREREw5F11c9gpaWlcjeBiIiIBMIeFSIiIhIWgwoREREJi0GFiIiIhMWgQkRERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYTGoEBERkbAYVIiIiEhYDCpEREQkLAYVIiIiEhaDChEREQmLQYWIiIiExaBCREREwmJQISIiImExqBAREZGwGFSIiIhIWAwqREREJCwGFSIiIhIWgwoREREJi0GFiIiIhMWgQkRERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYTGoEBERkbAYVIiIiEhYDCpEREQkLAYVIiIiEhaDChEREQmLQYWIiIiExaBCREREwmJQISIiImExqBAREZGwGFSIiIhIWAwqREREJCwGFSIiIhIWgwoREREJi0GFiIiIhMWgQkRERMJiUCEiIiJhMagQERGRsBhUiIiISFgMKkRERCQsBhUiIiISFoMKERERCYtBhYiIiITFoEJERETCYlAhIiIiYTGoEBERkbAYVIiIiEhYDCpEREQkLAYVIiIiEhaDChEREQkrVO4GEJG4DF1mNHWYYezphSoqDJqYcKijw+VuFhEFEVl7VF5++WXk5eVBpVJBpVKhoKAAH3zwgZxNIqJLzrd1Y/22MhS9uAc3//4Ail7Ygwe2leF8W7fcTSOiICJrUJk0aRI2b96Mo0eP4siRIygsLMSqVavw+eefy9ksoqBn6DJjw9+P45PqJofH91Y34dG/H4ehyyxTy4go2Mg69HPjjTc6fP3UU0/h5ZdfxqeffopZs2bJ1CoiauowDwkp/fZWN6Gpw8whICLyCWHmqFgsFrzzzjvo7OxEQUGB0+eYTCaYTCb710aj0VfNIwoqxp5el8fbRzhOROQpsq/6OXHiBGJjYxEREYF7770X27dvx8yZM50+d9OmTVCr1fZ/6enpPm4tUXBQRYa5PB43wnEiIk+RPahkZ2ejvLwchw4dwn333Yc1a9bg1KlTTp+7ceNGGAwG+7/6+noft5YoOGhiw7E4S+P02OIsDTSxHPYhIt9QSJIkyd2IgZYvX47p06fjj3/844jPNRqNUKvVMBgMUKlUPmgdUfA439aNR/9+HHsHzFVZnKXBM7fmITU+SsaWEZG/G8v9W5g5Kv2sVqvDPBQikkdafBR+tzofTR1mtPf0Ii4yDJpY1lEhIt+SNahs3LgRK1euxOTJk9He3o4333wTpaWl2LVrl5zNIqJL1NEMJkQkL1mDil6vx3/+53/iwoULUKvVyMvLw65du/D1r39dzmYRERGRIGQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ax = sns.scatterplot(x=data['BuildingArea'], y=data['Price'])\n",
"\n",
"xplot = [min(data['BuildingArea']), max(data['BuildingArea'])]\n",
"yplot = [h_w(x, w_ana) for x in xplot]\n",
"sns.lineplot(x=xplot, y=yplot, ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "aab92a40",
"metadata": {},
"outputs": [],
"source": [
"# Definition der Kostenfunktion\n",
"def J(w, x, y):\n",
" \"\"\"w, x, y müssen numpy arrays sein\"\"\"\n",
" errors = y - h_w(x=x, w=w)\n",
" mse = 1.0/(2.0*len(errors)) * ( errors @ errors )\n",
" return mse"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "7ef64eb2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Kosten der analytischen Lösung: 200141433273.1325\n"
]
}
],
"source": [
"x = data['BuildingArea'].to_numpy(copy=True)\n",
"y = data['Price'].to_numpy(copy=True)\n",
"J_ana = J(w=w_ana, x=x, y=y)\n",
"print('Kosten der analytischen Lösung: {}'.format(J_ana))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "0272e5ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([441524.42083181, 6024.22929588])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"w_ana"
]
},
{
"cell_type": "markdown",
"id": "217f80c5",
"metadata": {},
"source": [
"## Numerische Lösung mit Gradient Descent"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "6cda3066",
"metadata": {},
"outputs": [],
"source": [
"def grad_desc_upd(w, alpha, x, y):\n",
" \"\"\"y, x sind Vektoren (numpy-arrays)\"\"\"\n",
" errors = y - h_w(x=x, w=w)\n",
" w_0_upd = w[0] + alpha / len(x) * sum(errors)\n",
" \n",
" errors_x_x1 = errors @ x\n",
" w_1_upd = w[1] + alpha / len(x) * errors_x_x1\n",
" return [w_0_upd, w_1_upd]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b349e5ab",
"metadata": {},
"outputs": [],
"source": [
"def grad_desc(w, alpha, x, y, n_iterations):\n",
" J_all = [J(w=w, x=x, y=y)]\n",
" for it in range(n_iterations):\n",
" w = grad_desc_upd(w=w, alpha=alpha, x=x, y=y)\n",
" J_all.append(J(w=w, x=x, y=y))\n",
" return w, J_all"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b87084ca",
"metadata": {},
"outputs": [],
"source": [
"w_tmp, J_tmp = grad_desc(w=[1e5, 1000.], alpha=1e-9, x=data['BuildingArea'].to_numpy(), y=data['Price'].to_numpy(), n_iterations=1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "a129a532",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9999715711803561"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"J_tmp[1]/J_tmp[0]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "fd1bb601",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"w_gd_1e4: [915045.6766397628, 2959.2952860626924]\n",
"Vergleich zu Startkosten: 0.8899738097177349\n",
"Vergleich zu analytischer Lösung: 1.0924465228987312\n",
"(w0_gd - w0_ana)/w0_ana: 1.0724690039021088\n",
"(w1_gd - w1_ana)/w1_ana: -0.5087678206259784\n",
"CPU times: user 256 ms, sys: 8.18 ms, total: 264 ms\n",
"Wall time: 90.6 ms\n"
]
}
],
"source": [
"%%time\n",
"n_iterations = 10000\n",
"alpha = 0.0001 # mit alpha experimentieren\n",
"w_init = [1e6, 1000.]\n",
"x = data['BuildingArea'].to_numpy()\n",
"y = data['Price'].to_numpy()\n",
"w_gd_1e4, J_all_1e4 = grad_desc(w=w_init, alpha=alpha, x=x, y=y, n_iterations=n_iterations)\n",
"\n",
"print('w_gd_1e4: {}'.format(w_gd_1e4))\n",
"print('Vergleich zu Startkosten: {}'.format(J_all_1e4[-1]/J_all_1e4[0]))\n",
"print('Vergleich zu analytischer Lösung: {}'.format(J_all_1e4[-1]/J_ana))\n",
"print('(w0_gd - w0_ana)/w0_ana: {}'.format((w_gd_1e4[0]-w_ana[0])/w_ana[0]))\n",
"print('(w1_gd - w1_ana)/w1_ana: {}'.format((w_gd_1e4[1]-w_ana[1])/w_ana[1]))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1c26fde8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"w_gd_1e5: [548748.7304152894, 5330.2046262550075]\n",
"Vergleich zu Startkosten: 0.8185228834109243\n",
"Vergleich zu analytischer Lösung: 1.004740216095697\n",
"(w0_gd - w0_ana)/w0_ana: 0.24285023551238794\n",
"(w1_gd - w1_ana)/w1_ana: -0.1152055533638727\n",
"CPU times: user 958 ms, sys: 20.8 ms, total: 978 ms\n",
"Wall time: 825 ms\n"
]
}
],
"source": [
"%%time\n",
"n_iterations = 100000\n",
"alpha = 0.0001 # mit alpha experimentieren\n",
"w_init = [1e6, 1000.]\n",
"x = data['BuildingArea'].to_numpy()\n",
"y = data['Price'].to_numpy()\n",
"w_gd_1e5, J_all_1e5 = grad_desc(w=w_init, alpha=alpha, x=x, y=y, n_iterations=n_iterations)\n",
"\n",
"print('w_gd_1e5: {}'.format(w_gd_1e5))\n",
"print('Vergleich zu Startkosten: {}'.format(J_all_1e5[-1]/J_all_1e5[0]))\n",
"print('Vergleich zu analytischer Lösung: {}'.format(J_all_1e5[-1]/J_ana))\n",
"print('(w0_gd - w0_ana)/w0_ana: {}'.format((w_gd_1e5[0]-w_ana[0])/w_ana[0]))\n",
"print('(w1_gd - w1_ana)/w1_ana: {}'.format((w_gd_1e5[1]-w_ana[1])/w_ana[1]))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "ebff7a0b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"w_gd_3e5: [445476.78736763657, 5998.647038552499]\n",
"Vergleich zu Startkosten: 0.8146664601191632\n",
"Vergleich zu analytischer Lösung: 1.000006440595892\n",
"(w0_gd - w0_ana)/w0_ana: 0.008951637439164814\n",
"(w1_gd - w1_ana)/w1_ana: -0.0042465610235696925\n",
"CPU times: user 2.4 s, sys: 53.3 ms, total: 2.45 s\n",
"Wall time: 2.46 s\n"
]
}
],
"source": [
"%%time\n",
"n_iterations = 300000\n",
"alpha = 0.0001 # mit alpha experimentieren\n",
"w_init = [1e6, 1000.]\n",
"x = data['BuildingArea'].to_numpy()\n",
"y = data['Price'].to_numpy()\n",
"w_gd_3e5, J_all_3e5 = grad_desc(w=w_init, alpha=alpha, x=x, y=y, n_iterations=n_iterations)\n",
"\n",
"print('w_gd_3e5: {}'.format(w_gd_3e5))\n",
"print('Vergleich zu Startkosten: {}'.format(J_all_3e5[-1]/J_all_3e5[0]))\n",
"print('Vergleich zu analytischer Lösung: {}'.format(J_all_3e5[-1]/J_ana))\n",
"print('(w0_gd - w0_ana)/w0_ana: {}'.format((w_gd_3e5[0]-w_ana[0])/w_ana[0]))\n",
"print('(w1_gd - w1_ana)/w1_ana: {}'.format((w_gd_3e5[1]-w_ana[1])/w_ana[1]))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a1b5db98",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"w_gd_1e6: [441524.45883596264, 6024.229049890118]\n",
"Vergleich zu Startkosten: 0.8146612132155007\n",
"Vergleich zu analytischer Lösung: 1.0000000000000007\n",
"(w0_gd - w0_ana)/w0_ana: 8.60748531445252e-08\n",
"(w1_gd - w1_ana)/w1_ana: -4.0832990952070806e-08\n",
"CPU times: user 7.99 s, sys: 155 ms, total: 8.14 s\n",
"Wall time: 8.22 s\n"
]
}
],
"source": [
"%%time\n",
"n_iterations = 1000000\n",
"alpha = 0.0001 # mit alpha experimentieren\n",
"w_init = [1e6, 1000.]\n",
"x = data['BuildingArea'].to_numpy()\n",
"y = data['Price'].to_numpy()\n",
"w_gd_1e6, J_all_1e6 = grad_desc(w=w_init, alpha=alpha, x=x, y=y, n_iterations=n_iterations)\n",
"\n",
"print('w_gd_1e6: {}'.format(w_gd_1e6))\n",
"print('Vergleich zu Startkosten: {}'.format(J_all_1e6[-1]/J_all_1e6[0]))\n",
"print('Vergleich zu analytischer Lösung: {}'.format(J_all_1e6[-1]/J_ana))\n",
"print('(w0_gd - w0_ana)/w0_ana: {}'.format((w_gd_1e6[0]-w_ana[0])/w_ana[0]))\n",
"print('(w1_gd - w1_ana)/w1_ana: {}'.format((w_gd_1e6[1]-w_ana[1])/w_ana[1]))"
]
},
{
"cell_type": "markdown",
"id": "f35b62d4",
"metadata": {},
"source": [
"### Kosten J als Funktion von Gradient Descent Schritten"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "b18c5272",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAG+CAYAAABBOgSxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAri0lEQVR4nO3de3xU9Z3/8ffkNglhkhBiICFBLopYuSpUAUHoj+KtbNlW61YL2NZ19+HESnm4qxRdSsUG/KEP++haulKFUotx8QfYpSyVQgMFod2NpIBgFAOEW7iImdxgMpk5vz9y0UACmUlmvpmc1/PxmAeZM+fMfObbPMy73/M53+OwLMsSAACAITGmCwAAAPZGGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGRVUY2b59u6ZPn67s7Gw5HA6tX78+qOMvXryohx9+WMOHD1dcXJxmzJhx2T6nTp3Sgw8+qCFDhigmJkZz5szplNoBAEDroiqM1NTUaOTIkXrllVdCOt7v9yspKUk/+MEPNHXq1Fb38Xq9uuaaa/TMM89o5MiRHSkXAAC0Q5zpAoJx99136+67727zda/Xq/nz5+vNN99URUWFhg0bpiVLlmjy5MmSpOTkZC1btkyStHPnTlVUVFz2HgMGDNDPfvYzSdLrr7/e6d8BAAC0FFUzI1eTl5enXbt2qaCgQHv37tX999+vu+66Sx9//LHp0gAAQBu6TRgpKyvTihUrtGbNGk2cOFGDBw/Wk08+qdtvv10rVqwwXR4AAGhDVJ2muZJ9+/bJ7/dryJAhLbZ7vV717t3bUFUAAOBquk0Yqa6uVmxsrIqKihQbG9vitZ49exqqCgAAXE23CSOjR4+W3+/XmTNnNHHiRNPlAACAdoqqMFJdXa1Dhw41Pz98+LCKi4uVnp6uIUOG6KGHHtKsWbP04osvavTo0Tp79qy2bNmiESNG6N5775UkHThwQHV1dTp//ryqqqpUXFwsSRo1alTz+zZtq66u1tmzZ1VcXKyEhAR96UtfitRXBQDANhyWZVmmi2ivwsJCTZky5bLts2fP1sqVK+Xz+bRo0SKtWrVKJ06cUEZGhm677TYtXLhQw4cPl9Rw6e7Ro0cve48vDoPD4bjs9WuvvVZHjhzpvC8DAAAkRVkYAQAA3U+3ubQXAABEJ8IIAAAwKioaWAOBgE6ePCmXy9VqPwcAAOh6LMtSVVWVsrOzFRPT9vxHVISRkydPKjc313QZAAAgBMeOHVNOTk6br0dFGHG5XJIavkxKSorhagAAQHtUVlYqNze3+e94W6IijDSdmklJSSGMAAAQZa7WYkEDKwAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwChbh5Ef/+4DfedXf1HR0fOmSwEAwLZsHUb2Hq/QjkPn9Gl1nelSAACwLVuHEQAAYB5hBAAAGEUYkWSZLgAAABuzdRhxOBymSwAAwPZsHUYAAIB5hBEAAGAUYUSSRdMIAADG2DqM0DECAIB5tg4jAADAPMIIAAAwijAiiZVGAAAwx9ZhhGVGAAAwz9ZhBAAAmEcYAQAARhFGxDojAACYZOsw4mClEQAAjLN1GAEAAOYRRgAAgFGEEbHKCAAAJtk7jNAyAgCAcfYOIwAAwDjCCAAAMIowItYZAQDAJFuHEVpGAAAwz9ZhBAAAmEcYAQAARhFGJFmsNAIAgDG2DiMOmkYAADDO1mEEAACYRxgBAABGEUbEOiMAAJhk6zDiYKURAACMs3UYAQAA5hFGAACAUYQRiVVGAAAwyNZhhHVGAAAwz9ZhBAAAmBdUGMnPz9fYsWPlcrmUmZmpGTNmqKSkpN3HFxQUyOFwaMaMGcHWCQAAuqmgwsi2bdvkdru1e/dubd68WT6fT9OmTVNNTc1Vjz1y5IiefPJJTZw4MeRiw8VioREAAIyJC2bnTZs2tXi+cuVKZWZmqqioSJMmTWrzOL/fr4ceekgLFy7Un//8Z1VUVIRUbGejZwQAAPM61DPi8XgkSenp6Vfc7yc/+YkyMzP1/e9/v13v6/V6VVlZ2eIBAAC6p5DDSCAQ0Jw5czRhwgQNGzaszf127Nih1157TcuXL2/3e+fn5ys1NbX5kZubG2qZAACgiws5jLjdbu3fv18FBQVt7lNVVaWZM2dq+fLlysjIaPd7z5s3Tx6Pp/lx7NixUMsEAABdXFA9I03y8vK0YcMGbd++XTk5OW3u98knn+jIkSOaPn1687ZAINDwwXFxKikp0eDBgy87zul0yul0hlJaULg3DQAA5gUVRizL0uOPP65169apsLBQAwcOvOL+Q4cO1b59+1pse+aZZ1RVVaWf/exnnH4BAADBhRG3263Vq1frnXfekcvlUnl5uSQpNTVVSUlJkqRZs2apX79+ys/PV2Ji4mX9JGlpaZJ0xT4TAABgH0GFkWXLlkmSJk+e3GL7ihUr9PDDD0uSysrKFBMTXQu7sswIAADmBH2a5moKCwuv+PrKlSuD+ciwYp0RAADMi64pDAAA0O0QRgAAgFGEEUmWaBoBAMAUwggAADCKMAIAAIwijAAAAKMII2KdEQAATLJ1GHGw0AgAAMbZOowAAADzCCPiNA0AACYRRgAAgFG2DiN0jAAAYJ6twwgAADCPMCKxGDwAAAbZOoxwZS8AAObZOowAAADzCCMAAMAowogki4VGAAAwxtZhhJYRAADMs3UYAQAA5hFGAACAUYQRsc4IAAAm2TqMOFhoBAAA42wdRgAAgHmEEQAAYBRhRKJpBAAAg2wdRugYAQDAPFuHEQAAYB5hBAAAGEUYkWTRNAIAgDG2DiMsMwIAgHm2DiMAAMA8wggAADCKMCLJomUEAABjbB5GaBoBAMA0m4cRAABgGmEEAAAYRRgRt6YBAMAkW4cR1hkBAMA8W4cRAABgHmEEAAAYRRgR64wAAGCSrcMILSMAAJhn6zACAADMI4wAAACjCCOSLFYaAQDAGFuHEdYZAQDAPFuHEQAAYB5hBAAAGEUYEeuMAABgkq3DiIOVRgAAMM7WYQQAAJhHGAEAAEYRRiRWGQEAwCBbhxHWGQEAwDxbhxEAAGAeYQQAABhFGJFYaAQAAINsHUboGQEAwDxbhxEAAGAeYQQAABhFGBHrjAAAYJKtwwj3pgEAwLygwkh+fr7Gjh0rl8ulzMxMzZgxQyUlJVc8Zu3atRozZozS0tKUnJysUaNG6Te/+U2HigYAAN1HUGFk27Ztcrvd2r17tzZv3iyfz6dp06appqamzWPS09M1f/587dq1S3v37tV3v/tdffe739Uf/vCHDhcPAACiX1wwO2/atKnF85UrVyozM1NFRUWaNGlSq8dMnjy5xfMnnnhCv/71r7Vjxw7deeedwVUbJiwzAgCAOR3qGfF4PJIaZj/aw7IsbdmyRSUlJW2GF0nyer2qrKxs8QgLWkYAADAuqJmRLwoEApozZ44mTJigYcOGXXFfj8ejfv36yev1KjY2Vr/4xS/01a9+tc398/PztXDhwlBLAwAAUSTkMOJ2u7V//37t2LHjqvu6XC4VFxerurpaW7Zs0dy5czVo0KDLTuE0mTdvnubOndv8vLKyUrm5uaGWCgAAurCQwkheXp42bNig7du3Kycn56r7x8TE6LrrrpMkjRo1SgcPHlR+fn6bYcTpdMrpdIZSWkgsmkYAADAmqDBiWZYef/xxrVu3ToWFhRo4cGBIHxoIBOT1ekM6tjPRMgIAgHlBhRG3263Vq1frnXfekcvlUnl5uSQpNTVVSUlJkqRZs2apX79+ys/Pl9TQ/zFmzBgNHjxYXq9XGzdu1G9+8xstW7ask78KAACIRkGFkaYAcenplRUrVujhhx+WJJWVlSkm5vOLdGpqavTYY4/p+PHjSkpK0tChQ/XGG2/ogQce6FjlAACgWwj6NM3VFBYWtni+aNEiLVq0KKiiIo2OEQAAzLH3vWkcdI0AAGCarcMIAAAwjzACAACMIoyIe9MAAGCSrcMIHSMAAJhn6zA
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.lineplot(x=list(range(len(J_all_1e6))), y=J_all_1e6)"
]
},
{
"cell_type": "markdown",
"id": "5ec059f6",
"metadata": {},
"source": [
"### Plotten der Ergebnisse und Vergleich zwischen analytischer und numerischer Lösung\n",
"Nach $10^4$ Schritten des Gradient Descent Algorithmus weicht der lineare Fit noch sichtbar von der analytischen Lösung ab. Nach $10^5$ Schritten ist der Unterschied im Plot kaum zu erkennen.\n",
"Die numerische Lösung war in diesem Beispiel deutlich langsamer als die analytische. Allerdings haben wir für die analytische Lösung auch eine effiziente numpy-Implementierung genutzt und für die numerische unoptimierten Python-Code."
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "21c941e4",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHACAYAAACMB0PKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhfklEQVR4nO3daXhb1b02/FuzJduSB1m25FGeMzmEBGiaECBJISmlTKUtpT0MPe0DJJxS2h6g7elIS2ihp4f2lE4M5XkgbSlDX2iZGkgCIaVJyDw4HuR5niTLtsa93w/CimVLnm1tSffvunIVa0vbS7u29u21/mstmSiKIoiIiIgkSB7tBhARERFFwqBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSFTdBZe/evbjqqqtgsVggk8nw0ksvzfgcoiji4YcfRnl5OTQaDXJzc/GjH/1o/htLRERE06KMdgPmy9DQEFauXInbbrsN11133azO8ZWvfAVvvPEGHn74YaxYsQJ9fX3o6+ub55YSERHRdMnicVNCmUyGF198Eddcc03wMbfbjW9961vYuXMnBgYGsHz5cjz00EO49NJLAQCnT59GVVUVTpw4gYqKiug0nIiIiELEzdDPVLZv3479+/fjj3/8I44dO4YbbrgBW7ZsQU1NDQDg5ZdfRnFxMV555RVYrVYUFRXh3//939mjQkREFEUJEVSamprw5JNP4rnnnsPFF1+MkpISfP3rX8f69evx5JNPAgDq6+vR2NiI5557Dk8//TSeeuopHDp0CJ/61Kei3HoiIqLEFTc1KpM5fvw4/H4/ysvLQx53u93IzMwEAAiCALfbjaeffjr4vMcffxyrV69GdXU1h4OIiIiiICGCitPphEKhwKFDh6BQKEKOpaSkAADMZjOUSmVImFmyZAmAQI8MgwoREdHiS4igsmrVKvj9fnR1deHiiy8O+5x169bB5/Ohrq4OJSUlAICzZ88CAAoLCxetrURERHRO3Mz6cTqdqK2tBRAIJj/72c9w2WWXISMjAwUFBfj85z+Pffv24ZFHHsGqVavQ3d2NXbt2oaqqCldeeSUEQcAFF1yAlJQU/PznP4cgCNi2bRv0ej3eeOONKL87IiKixBQ3QWX37t247LLLJjx+880346mnnoLX68UDDzyAp59+Gq2trTAajfjIRz6C73//+1ixYgUAoK2tDXfddRfeeOMNJCcnY+vWrXjkkUeQkZGx2G+HiIiIEEdBhYiIiOJPQkxPJiIiotjEoEJERESSFdOzfgRBQFtbG1JTUyGTyaLdHCIiIpoGURQxODgIi8UCuXzyPpOYDiptbW3Iz8+PdjOIiIhoFpqbm5GXlzfpc2I6qKSmpgIIvFG9Xh/l1hAREdF0OBwO5OfnB+/jk4npoDI63KPX6xlUiIiIYsx0yjZYTEtERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSFdNL6BMRxSv7sAc9Tg8cLi/0WhWMyWoYdOpoN4to0TGoEBFJTNvACO59/hjeqekJPrahzIgd11fBkqaNYsuIFh+HfoiIJMQ+7JkQUgBgb00P7nv+GOzDnii1jCg6GFSIiCSkx+mZEFJG7a3pQY+TQYUSC4MKEZGEOFzeSY8PTnGcKN4wqBARSYg+STXp8dQpjhPFGwYVIiIJMaaosaHMGPbYhjIjjCmc+UOJhUGFiEhCDDo1dlxfNSGsbCgz4qHrqzhFmRIOpycTEUmMJU2LX9y4Cj1ODwZdXqQmqWBM4ToqlJgYVIiIJMigYzAhAjj0Q0RERBLGoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJLFoEJERESSxaBCREREksWgQkRERJIlmaCyY8cOyGQy3H333dFuChEREUmEJILKgQMH8Jvf/AZVVVXRbgoRERFJSNSDitPpxE033YTf/e53SE9Pj3ZziIiISEKiHlS2bduGK6+8Eps3b452U4iIiEhilNH85n/84x/xwQcf4MCBA9N6vtvthtvtDn7tcDgWqmlEREQkAVHrUWlubsZXvvIVPPPMM0hKSprWax588EEYDIbgv/z8/AVuJREREUWTTBRFMRrf+KWXXsK1114LhUIRfMzv90Mmk0Eul8PtdoccA8L3qOTn58Nut0Ov1y9a24mIiGj2HA4HDAbDtO7fURv62bRpE44fPx7y2K233orKykrce++9E0IKAGg0Gmg0msVqIhEREUVZ1IJKamoqli9fHvJYcnIyMjMzJzxOREREiSnqs36IiIiIIonqrJ/xdu/eHe0mEBERkYSwR4WIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCSLQYWIiIgki0GFiIiIJItBhYiIiCRLGe0GEJF02Yc96HF64HB5odeqYExWw6BTR7tZRJRAotqj8thjj6Gqqgp6vR56vR5r167Fq6++Gs0mEdGH2gZGsH3nYWz62R5c+6v3sOmRPbhr52G0DYxEu2lElECiGlTy8vKwY8cOHDp0CAcPHsTGjRtx9dVX4+TJk9FsFlHCsw97cO/zx/BOTU/I43trenDf88dgH/ZEqWVElGiiOvRz1VVXhXz9ox/9CI899hj++c9/YtmyZVFqFRH1OD0TQsqovTU96HF6OARERItCMjUqfr8fzz33HIaGhrB27dqwz3G73XC73cGvHQ7HYjWPKKE4XN5Jjw9OcZyIaL5EfdbP8ePHkZKSAo1Gg9tvvx0vvvgili5dGva5Dz74IAwGQ/Bffn7+IreWKDHok1STHk+d4jgR0XyJelCpqKjAkSNH8P777+OOO+7AzTffjFOnToV97v333w+73R7819zcvMitJUoMxhQ1NpQZwx7bUGaEMYXDPkS0OGSiKIrRbsRYmzdvRklJCX7zm99M+VyHwwGDwQC73Q69Xr8IrSNKHG0DI7jv+WPYO6ZWZUOZEQ9dXwVzmjaKLSOiWDeT+7dkalRGCYIQUodCRNFhSdPiFzeuQo/Tg0GXF6lJKhhTuI4KES2uqAaV+++/H1u3bkVBQQEGBwfx7LPPYvfu3Xj99dej2Swi+pBBx2BCRNEV1aDS1dWFf/u3f0N7ezsMBgO
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot\n",
"xplot = [min(data['BuildingArea']), max(data['BuildingArea'])]\n",
"yplot_ana = [h_w(x, w_ana) for x in xplot]\n",
"yplot_gd_1e4 = [h_w(x, w_gd_1e4) for x in xplot]\n",
"yplot_gd_1e5 = [h_w(x, w_gd_1e5) for x in xplot]\n",
"# yplot_gd_3e5 = [h_w(x, w_gd_3e5) for x in xplot]\n",
"yplot_gd_1e6 = [h_w(x, w_gd_1e6) for x in xplot]\n",
"ax = sns.scatterplot(x=data['BuildingArea'], y=data['Price'])\n",
"ax = sns.lineplot(x=xplot, y=yplot_ana, ax=ax)\n",
"ax = sns.lineplot(x=xplot, y=yplot_gd_1e4, color='red', ax=ax)\n",
"ax = sns.lineplot(x=xplot, y=yplot_gd_1e5, color='grey', ax=ax)\n",
"# ax = sns.lineplot(x=xplot, y=yplot_gd_3e5, color='green', linestyle='dotted', ax=ax)\n",
"ax = sns.lineplot(x=xplot, y=yplot_gd_1e6, color='pink', linestyle='--', ax=ax)"
]
},
{
"cell_type": "markdown",
"id": "60bc96a1",
"metadata": {},
"source": [
"## Vorhersagen unseres Modells\n",
"\n",
"Man kann die Vorhersagen des Modells entweder im Plot oben auf der Geraden ablesen. Zu jedem Wert von `BuildingArea` (x-Achse des Plots) kann so der `Preis` auf der y-Achse abgelesen werden.\n",
"\n",
"Alternativ können wir die von uns oben definierte Funktion `h_w(x, w)` aufrufen. Der Parameter `w` ist die gefunden Lösung und `x` die `BuildingArea` für die wir einen Preis vorhersagen wollen."
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e13003c8",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Preis laut analytischem Modell: 2170478.23\n",
"Preis laut Gradient Descent Modell nach 10^5 Iterationen: 2078517.46\n",
"Preis laut Gradient Descent Modell nach 3*10^5 Iterationen: 2167088.49\n",
"Preis laut Gradient Descent Modell nach 1*10^6 Iterationen: 2170478.20\n"
]
}
],
"source": [
"# Beispiel: Vorhersage unseres Modells für ein Haus mit Wohnfläche 287:\n",
"# wir machen je eine Vorhersage mit\n",
"# 1. den analytisch gefundenen Paramtern\n",
"# 2. den mit Gradient Descent nach 10^5 Iterationen gefundenen Parametern\n",
"# 3. den mit Gradient Descent nach 3*10^5 Iterationen gefundenen Parametern\n",
"building_area_new = 287\n",
"price_ana = h_w(x=building_area_new, w=w_ana)\n",
"price_1e5 = h_w(x=building_area_new, w=w_gd_1e5)\n",
"price_3e5 = h_w(x=building_area_new, w=w_gd_3e5)\n",
"price_1e6 = h_w(x=building_area_new, w=w_gd_1e6)\n",
"print('Preis laut analytischem Modell: {:.2f}'.format(price_ana))\n",
"print('Preis laut Gradient Descent Modell nach 10^5 Iterationen: {:.2f}'.format(price_1e5))\n",
"print('Preis laut Gradient Descent Modell nach 3*10^5 Iterationen: {:.2f}'.format(price_3e5))\n",
"print('Preis laut Gradient Descent Modell nach 1*10^6 Iterationen: {:.2f}'.format(price_1e6))"
]
},
{
"cell_type": "markdown",
"id": "fca62677",
"metadata": {},
"source": [
"## $R^2$"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "f1703c7f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"erklärte Varianz (R^2): 0.22971025499088604\n"
]
}
],
"source": [
"x = data['BuildingArea'].to_numpy(copy=True)\n",
"y = data['Price'].to_numpy(copy=True)\n",
"J_ana = J(w=w_ana, x=x, y=y)\n",
"MSE = 2*J_ana\n",
"mu_y = sum(y)/len(y)\n",
"sigma_y_quadrat = ( (y - mu_y) @ (y - mu_y) ) / len(y)\n",
"R2 = 1 - MSE/sigma_y_quadrat\n",
"print('erklärte Varianz (R^2): {}'.format(R2))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}