{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "(GLM-out-of-sample-predictions)=\n", "# Out-Of-Sample Predictions\n", "\n", ":::{post} December, 2022\n", ":tags: generalized linear model, logistic regression, out of sample predictions, patsy\n", ":category: beginner\n", ":::" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import arviz as az\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import patsy\n", "import pymc as pm\n", "import seaborn as sns\n", "\n", "from scipy.special import expit as inverse_logit\n", "from sklearn.metrics import RocCurveDisplay, accuracy_score, auc, roc_curve\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "RANDOM_SEED = 8927\n", "rng = np.random.default_rng(RANDOM_SEED)\n", "az.style.use(\"arviz-darkgrid\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate Sample Data\n", "\n", "We want to fit a logistic regression model where there is a multiplicative interaction between two numerical features." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x1x2y
0-0.4452841.3813250
12.6513170.8007361
2-1.141940-0.1282040
31.336498-0.9319650
42.2907623.4002221
\n", "