{ "cells": [ { "cell_type": "code", "execution_count": 23, "id": "0c067959-88f2-4579-ba5f-b4fe8a3b81fe", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "# plotting settings\n", "pd.plotting.register_matplotlib_converters()\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import seaborn as sns\n", "import sklearn\n", "from sklearn.linear_model import LinearRegression,Ridge\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import StandardScaler, PolynomialFeatures" ] }, { "cell_type": "code", "execution_count": 4, "id": "f068fcd9-6023-4f40-87fe-3323ef241ab1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BuildingAreaRoomsPrice
179.021035000.0
2150.031465000.0
4142.041600000.0
6210.031876000.0
7107.021636000.0
\n", "
" ], "text/plain": [ " BuildingArea Rooms Price\n", "1 79.0 2 1035000.0\n", "2 150.0 3 1465000.0\n", "4 142.0 4 1600000.0\n", "6 210.0 3 1876000.0\n", "7 107.0 2 1636000.0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "melbourne_file_path = 'data/melb_data.csv'\n", "melbourne_data = pd.read_csv(melbourne_file_path)\n", "melbourne_data = melbourne_data.dropna(axis=0) # entfernen von Daten mit fehlenden Werten\n", "# wählen für unser Beispiel einen kleinen Ausschnitt aus den Daten (denselben, wie im ersten Beispiel)\n", "max_area = 400\n", "max_datapoints = 10\n", "data = melbourne_data[melbourne_data['BuildingArea'] < max_area][:max_datapoints][['BuildingArea', 'Rooms', 'Price']]\n", "data.head()\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "d09ea079-2ec7-4e6c-ba46-497874008564", "metadata": {}, "outputs": [], "source": [ "x = data[['BuildingArea'] ].to_numpy(copy=True).reshape((-1, 1))\n", "y = data['Price'].to_numpy(copy=True)" ] }, { "cell_type": "code", "execution_count": 24, "id": "41a5faf5-33a0-4b71-bb3c-7255d09602a4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
PolynomialFeatures(degree=4)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "PolynomialFeatures(degree=4)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pfeats = PolynomialFeatures(4)\n", "pfeats" ] }, { "cell_type": "code", "execution_count": 27, "id": "fbb2ddae-a717-41be-8eb5-c777f60344c9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('polynomialfeatures',\n",
       "                 PolynomialFeatures(degree=4, include_bias=False)),\n",
       "                ('standardscaler', StandardScaler()),\n",
       "                ('linearregression', LinearRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('polynomialfeatures',\n", " PolynomialFeatures(degree=4, include_bias=False)),\n", " ('standardscaler', StandardScaler()),\n", " ('linearregression', LinearRegression())])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = make_pipeline(PolynomialFeatures(4,include_bias=False), StandardScaler(), LinearRegression())\n", "pipeline" ] }, { "cell_type": "code", "execution_count": 31, "id": "3daa0a07-5e9f-49b8-a3b2-ca33ae47bd79", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "w0: 708105.2074469186\n", "w1: [4974.440132]\n" ] } ], "source": [ "pipeline.fit(x,y)\n", "# print model parameters\n", "print('w0: {}'.format(model.intercept_))\n", "print('w1: {}'.format(model.coef_))\n", "\n" ] }, { "cell_type": "code", "execution_count": 32, "id": "ce945e46-d16a-47dd-ba9c-6bf3920cf8c5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "erklärte Varianz (R^2): 0.4874972676252126\n" ] } ], "source": [ "r_sq = model.score(x, y)\n", "print('erklärte Varianz (R^2): {}'.format(r_sq))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python (ki)", "language": "python", "name": "myenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }