2025-01-23 10:44:22 +01:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"id": "dff037b7-7d71-49c2-8a47-48017c073f81",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"$R^2$ für ein polynomielles Modell"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 74,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "8b1ff6ff-f80e-4cc3-b266-0ad417911d1d",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"# plotting settings\n",
|
|
|
|
"pd.plotting.register_matplotlib_converters()\n",
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
"%matplotlib inline\n",
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
"from tqdm.notebook import tqdm"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 75,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "f698283d-7346-4618-9b87-60a3de061a98",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',\n",
|
|
|
|
" 'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',\n",
|
|
|
|
" 'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',\n",
|
|
|
|
" 'Longtitude', 'Regionname', 'Propertycount'],\n",
|
|
|
|
" dtype='object')"
|
|
|
|
]
|
|
|
|
},
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 75,
|
2025-01-23 10:44:22 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"melbourne_file_path = 'data/melb_data.csv'\n",
|
|
|
|
"melbourne_data = pd.read_csv(melbourne_file_path)\n",
|
|
|
|
"melbourne_data = melbourne_data.dropna(axis=0) # entfernen von Daten mit fehlenden Werten\n",
|
|
|
|
"melbourne_data.columns # Spaltennamen der Tabelle (potentielle Features)\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 83,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "dac65c52-f2ce-47b6-ba65-3c6bd915dfe8",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
2025-01-30 10:07:11 +01:00
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"/home/plex/.pyenv/versions/ki/lib/python3.12/site-packages/pandas/core/nanops.py:1016: RuntimeWarning: invalid value encountered in subtract\n",
|
|
|
|
" sqr = _ensure_numeric((avg - values) ** 2)\n"
|
|
|
|
]
|
|
|
|
},
|
2025-01-23 10:44:22 +01:00
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Rooms</th>\n",
|
|
|
|
" <th>BuildingArea</th>\n",
|
|
|
|
" <th>Lattitude</th>\n",
|
|
|
|
" <th>Price</th>\n",
|
|
|
|
" <th>price_per_area</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>79.0</td>\n",
|
|
|
|
" <td>-37.8079</td>\n",
|
|
|
|
" <td>1035000.0</td>\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
" <td>6634.615385</td>\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>150.0</td>\n",
|
|
|
|
" <td>-37.8093</td>\n",
|
|
|
|
" <td>1465000.0</td>\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
" <td>10932.835821</td>\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>4</th>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>142.0</td>\n",
|
|
|
|
" <td>-37.8072</td>\n",
|
|
|
|
" <td>1600000.0</td>\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
" <td>13333.333333</td>\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>6</th>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>210.0</td>\n",
|
|
|
|
" <td>-37.8024</td>\n",
|
|
|
|
" <td>1876000.0</td>\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
" <td>7657.142857</td>\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>7</th>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>107.0</td>\n",
|
|
|
|
" <td>-37.8060</td>\n",
|
|
|
|
" <td>1636000.0</td>\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
" <td>6390.625000</td>\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" Rooms BuildingArea Lattitude Price price_per_area\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
"1 2 79.0 -37.8079 1035000.0 6634.615385\n",
|
|
|
|
"2 3 150.0 -37.8093 1465000.0 10932.835821\n",
|
|
|
|
"4 4 142.0 -37.8072 1600000.0 13333.333333\n",
|
|
|
|
"6 3 210.0 -37.8024 1876000.0 7657.142857\n",
|
|
|
|
"7 2 107.0 -37.8060 1636000.0 6390.625000"
|
2025-01-23 10:44:22 +01:00
|
|
|
]
|
|
|
|
},
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 83,
|
2025-01-23 10:44:22 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"#features = ['BuildingArea', Rooms', 'Bathroom', 'Landsize', 'Lattitude', 'Longtitude', 'YearBuilt', 'Distance']\n",
|
|
|
|
"features = ['Rooms', 'BuildingArea', 'Lattitude']\n",
|
|
|
|
"data = melbourne_data[features + ['Price']]\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
"data = data.assign(price_per_area = melbourne_data['Price'] / melbourne_data[\"Landsize\"])\n",
|
2025-01-23 10:44:22 +01:00
|
|
|
"data.describe()\n",
|
|
|
|
"data.head()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 77,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "afe5a08a-abec-4164-85c4-1d3ac8398a62",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def h(x, w):\n",
|
|
|
|
" \"\"\"x und w sind numpy arrays; x kann auch die komplette Feature-Matrix sein\"\"\"\n",
|
|
|
|
" # Diese Form erlaubt es für x eine ganze (Feature-)Matrix zu übergeben. Die Matrix enthält\n",
|
|
|
|
" # zeilenweise je einen Datenpunkt, für den h berechnet werden soll.\n",
|
|
|
|
" # w @ x.T ist dann ein Vektor mit je einem Ergebnis in den Komponenten des Vektors pro Zeile\n",
|
|
|
|
" # der übergebenen (Feature-)Matrix.\n",
|
|
|
|
" return x @ w\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 78,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "c2e256a3-3575-45c4-a99c-d41c3c56e1c3",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Definition der Kostenfunktion\n",
|
|
|
|
"def J(w, X, y):\n",
|
|
|
|
" \"\"\"\n",
|
|
|
|
" w, X, y müssen numpy arrays sein\n",
|
|
|
|
" X: Feature-Matrix aller Trainingsdaten inkl. Spalte mit 1; Dimension: n x (d+1)\n",
|
|
|
|
" y: Vektor aller Targets zu X\n",
|
|
|
|
" \"\"\"\n",
|
|
|
|
" errors = y - h(x=X, w=w)\n",
|
|
|
|
" mse = 1.0/(2.0*len(y)) * ( errors @ errors )\n",
|
|
|
|
" return mse\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 79,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "b41b9c03-0c1d-4a6b-80e8-d7e8775b69c0",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def feature_matrix_from_data(data):\n",
|
|
|
|
" # hier erzeugen wir die Matrix mit unseren Input-Daten (Features) inklusive der Spalte mit \"1\"\n",
|
|
|
|
" return np.hstack((np.ones((len(data),1)), data.to_numpy(copy=True)))\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 80,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "2c631e17-eb36-43d0-97b1-59add1c93dd9",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# hier erzeugen wir die Matrix mit unseren Input-Daten (Features) inklusive der Spalte mit \"1\"\n",
|
|
|
|
"#X = np.hstack((np.ones((len(data),1)), data[features].to_numpy(copy=True)))\n",
|
|
|
|
"X = feature_matrix_from_data(data[features])\n",
|
|
|
|
"# und ausserdem den Vektor der Targets\n",
|
|
|
|
"y = data.Price.to_numpy(copy=True)\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2025-01-30 10:07:11 +01:00
|
|
|
"execution_count": 81,
|
2025-01-23 10:44:22 +01:00
|
|
|
"id": "8301eb74-9aae-446c-ad46-924811b99777",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"Die 4 Parameter der linearen Regression:\n",
|
|
|
|
"[-6.97461781e+07 2.41559504e+05 2.31456611e+03 -1.84562537e+06]\n",
|
|
|
|
"Kostenfunktion J(w_ana): 137899453867.5851\n",
|
2025-01-30 10:07:11 +01:00
|
|
|
"CPU times: user 495 μs, sys: 39 μs, total: 534 μs\n",
|
|
|
|
"Wall time: 553 μs\n"
|
2025-01-23 10:44:22 +01:00
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"%%time\n",
|
|
|
|
"w_ana = np.linalg.solve(X.T @ X, X.T @ y)\n",
|
|
|
|
"print('Die {} Parameter der linearen Regression:\\n{}'.format(len(w_ana), w_ana))\n",
|
|
|
|
"J_ana = J(w=w_ana, X=X, y=y)\n",
|
|
|
|
"print('Kostenfunktion J(w_ana): {}'.format(J_ana))\n"
|
|
|
|
]
|
2025-01-30 10:07:11 +01:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 91,
|
|
|
|
"id": "a89c591c-4ebd-4a3d-958b-915738d7361a",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"1 6634.615385\n",
|
|
|
|
"2 10932.835821\n",
|
|
|
|
"4 13333.333333\n",
|
|
|
|
"6 7657.142857\n",
|
|
|
|
"7 6390.625000\n",
|
|
|
|
"Name: price_per_area, dtype: float64\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"<Axes: xlabel='BuildingArea', ylabel='Price'>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 91,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHACAYAAACMB0PKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAPKNJREFUeJzt3Xl8VPW9x//3LJmZbJNgIpsGiQZFNkFQSgPcqvxARFxKq5eiDwRvvSqo1N4WcKFaF8Cl17rU1gXUX1Vaq9CK+wWVxQ3ZEQSiFPjJGiCZrDOZmfP7AzOeyYSEJcw5ZF7PxyOPR+Z8T858MkMe8+a7HYdhGIYAAABsyGl1AQAAAIdCUAEAALZFUAEAALZFUAEAALZFUAEAALZFUAEAALZFUAEAALZFUAEAALZFUAEAALZFUAEAALbVaoLKokWLNHLkSHXs2FEOh0Pz5s074msYhqFHHnlEZ555prxer0455RQ98MADLV8sAAA4LG6rC2gpVVVVOuecczR+/Hj99Kc/Papr3HbbbXr//ff1yCOPqGfPntq/f7/279/fwpUCAIDD5WiNNyV0OByaO3eurrjiitixYDCoO++8U6+++qrKysrUo0cPzZw5Uz/5yU8kSRs2bFCvXr20bt06nXXWWdYUDgAA4rSaoZ/mTJw4UZ9++qnmzJmjNWvW6Oc//7kuvvhibd68WZL05ptv6vTTT9f8+fNVWFiozp0767/+67/oUQEAwEIpEVS2bdum2bNn67XXXtOgQYN0xhln6H/+5380cOBAzZ49W5L07bffauvWrXrttdf00ksv6YUXXtDy5cv1s5/9zOLqAQBIXa1mjkpT1q5dq0gkojPPPDPueDAYVF5eniQpGo0qGAzqpZdeip33/PPPq2/fvtq4cSPDQQAAWCAlgkplZaVcLpeWL18ul8sV15aVlSVJ6tChg9xud1yYOfvssyUd7JEhqAAAkHwpEVT69OmjSCSiPXv2aNCgQY2eU1xcrHA4rG+++UZnnHGGJGnTpk2SpNNOOy1ptQIAgB+0mlU/lZWVKikpkXQwmPzhD3/QBRdcoJNOOkmdOnXSNddco6VLl+rRRx9Vnz59tHfvXi1YsEC9evXSiBEjFI1Gdd555ykrK0uPPfaYotGoJkyYIL/fr/fff9/i3w4AgNTUaoLKRx99pAsuuCDh+NixY/XCCy+orq5O999/v1566SV99913ys/P149+9CPde++96tmzpyRpx44duuWWW/T+++8rMzNTw4cP16OPPqqTTjop2b8OAABQKwoqAACg9UmJ5ckAAODERFABAAC2dUKv+olGo9qxY4eys7PlcDisLgcAABwGwzBUUVGhjh07yulsus/khA4qO3bsUEFBgdVlAACAo7B9+3adeuqpTZ5zQgeV7OxsSQd/Ub/fb3E1AADgcAQCARUUFMQ+x5tyQgeV+uEev99PUAEA4ARzONM2mEwLAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABsi6ACAABs64TeQv94Ka8OqbQypEBtnfzpacrP9Cgnw2N1WQAApByCSgM7ymo0+fU1Wry5NHZscJd8zRjVSx1z0y2sDACA1MPQj0l5dSghpEjSos2lmvL6GpVXhyyqDACA1ERQMSmtDCWElHqLNpeqtJKgAgBAMhFUTAK1dU22VzTTDgAAWhZBxcTvS2uyPbuZdgAA0LIIKib5WR4N7pLfaNvgLvnKz2LlDwAAyURQMcnJ8GjGqF4JYWVwl3zNHNWLJcoAACQZy5Mb6JibridG91FpZUgVtXXK9qUpP4t9VAAAsAJBpRE5GQQTAADsgKEfAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgWwQVAABgW5YGlUgkorvvvluFhYVKT0/XGWecofvuu0+GYVhZlsqrQ/pmT6VWbjugb/ZWqrw6ZGk9AACkKreVTz5z5kw9/fTTevHFF9W9e3d9+eWXGjdunHJycnTrrbdaUtOOshpNfn2NFm8ujR0b3CVfM0b1UsfcdEtqAgAgVVnao/LJJ5/o8ssv14gRI9S5c2f97Gc/09ChQ/XFF19YUk95dSghpEjSos2lmvL6GnpWAABIMkuDyo9//GMtWLBAmzZtkiStXr1aS5Ys0fDhwxs9PxgMKhAIxH21pNLKUEJIqbdoc6lKKwkqAAAkk6VDP1OmTFEgEFDXrl3lcrkUiUT0wAMPaMyYMY2eP336dN17773HrZ5AbV2T7RXNtAMAgJZlaY/K3//+d7388st65ZVXtGLFCr344ot65JFH9OKLLzZ6/tSpU1VeXh772r59e4vW4/elNdme3Uw7AABoWZb2qPzmN7/RlClT9J//+Z+SpJ49e2rr1q2aPn26xo4dm3C+1+uV1+s9bvXkZ3k0uEu+FjUy/DO4S77yszzH7bkBAEAiS3tUqqur5XTGl+ByuRSNRi2pJyfDoxmjemlwl/y444O75GvmqF7KySCoAACQTJb2qIwcOVIPPPCAOnXqpO7du2vlypX6wx/+oPHjx1tWU8fcdD0xuo9KK0OqqK1Tti9N+VkeQgoAABZwGBburlZRUaG7775bc+fO1Z49e9SxY0eNHj1a06ZNk8fTfDAIBALKyclReXm5/H5/EioGAADH6kg+vy0NKseKoAIAwInnSD6/udcPAACwLYIKAACwLUsn09pVeXVIpZUhBWrr5E9PU34mk2kBALACQaUBbkoIAIB9MPRjwk0JAQCwF4KKCTclBADAXggqJtyUEAAAeyGomHBTQgAA7IWgYlJ/U8LGcFNCAACSj6Biwk0JAQCwF5YnN8BNCQEAsA+CSiNyMggmAADYAUM/AADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAtggqAADAttxWF2BH5dUhlVaGFKitkz89TfmZHuVkeKwuCwCAlENQaWBHWY0mv75GizeXxo4N7pKvGaN6qWNuuoWVAQCQehj6MSmvDiWEFElatLlUU15fo/LqkEWVAQCQmggqJqWVoYSQUm/R5lKVVhJUAABIJoKKSaC2rsn2imbaAQBAyyKomPh9aU22ZzfTDgAAWhZBxSQ/y6PBXfIbbRvcJV/5Waz8AQAgmQgqJjkZHs0Y1SshrAzukq+Zo3qxRBkAgCRjeXIDHXPT9cToPiqtDKmitk7ZvjTlZ7GPCgAAViCoNCIng2ACAIAdEFQawc60AADYA0GlAXamBQDAPphMa8LOtAAA2AtBxYSdaQEAsBeCigk70wIAYC8EFRN2pgUAwF4IKibsTAsAgL0QVEzYmRYAAHuxfHnyd999p8mTJ+udd95RdXW1ioqKNHv2bPXr18+SetiZFgAA+7A0qBw4cEDFxcW64IIL9M477+jkk0/W5s2b1aZNGyvLYmdaAABswtKgMnPmTBUUFGj27NmxY4WFhRZWBAAA7MTSOSr/+te/1K9fP/385z9X27Zt1adPHz377LNWlgQAAGzE0qDy7bff6umnn1aXLl303nvv6aabbtKtt96qF198sdHzg8GgAoFA3BcAAGi9HIZhGFY9ucfjUb9+/fTJJ5/Ejt16661atmyZPv3004Tz77nnHt17770Jx8vLy+X3+49rrQAAoGUEAgHl5OQc1ue3pT0qHTp0ULdu3eKOnX322dq2bVuj50+dOlXl5eWxr+3btyejTAA
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"ax = sns.scatterplot(x=data['BuildingArea'], y=data['Price'])\n",
|
|
|
|
"\n",
|
|
|
|
"xplot = data['price_per_area']\n",
|
|
|
|
"yplot = data['price_per_area'][0:len(xplot)]\n",
|
|
|
|
"print(yplot.head())\n",
|
|
|
|
"sns.lineplot(x=xplot, y=yplot, ax=ax)\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 85,
|
|
|
|
"id": "e4fc1d51-3435-415d-875f-db85dfda39e2",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"1 6634.615385\n",
|
|
|
|
"2 10932.835821\n",
|
|
|
|
"4 13333.333333\n",
|
|
|
|
"6 7657.142857\n",
|
|
|
|
"7 6390.625000\n",
|
|
|
|
"Name: price_per_area, dtype: float64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 85,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"data['price_per_area'].head()"
|
|
|
|
]
|
2025-01-23 10:44:22 +01:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python (ki)",
|
|
|
|
"language": "python",
|
|
|
|
"name": "myenv"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.12.8"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 5
|
|
|
|
}
|