ml-course/sample_project/introduction-to-pandas.ipynb

5915 lines
208 KiB
Plaintext
Raw Normal View History

2022-08-13 19:02:12 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "d0b99ebe",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "18b71e0d",
"metadata": {},
"outputs": [],
"source": [
"# 2 main datatypes\n",
"series = pd.Series([\"BMW\", \"Toyota\", \"Honda\"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5fd5c8ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 BMW\n",
"1 Toyota\n",
"2 Honda\n",
"dtype: object"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8293df83",
"metadata": {},
"outputs": [],
"source": [
"# series = 1-dimensional"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7ce01316",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Red\n",
"1 Blue\n",
"2 White\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"colours = pd.Series([\"Red\", \"Blue\", \"White\"])\n",
"colours"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3df244ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Car make</th>\n",
" <th>Colur</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BMW</td>\n",
" <td>Red</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Honda</td>\n",
" <td>White</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Car make Colur\n",
"0 BMW Red\n",
"1 Toyota Blue\n",
"2 Honda White"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame = 2-dimensional\n",
"car_data = pd.DataFrame({\"Car make\": series, \"Colur\": colours})\n",
"car_data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6269eb7",
"metadata": {},
"outputs": [],
"source": [
"# Import data\n",
"car_sales = pd.read_csv(\"car-sales.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5ed55160",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "118c9363",
"metadata": {},
"outputs": [],
"source": [
"# Exporting a dataframe\n",
"# car_sales.to_csv(\"exported.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"id": "0be3c88e",
"metadata": {},
"source": [
"## Describe data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f6ae0796",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make object\n",
"Colour object\n",
"Odometer (KM) int64\n",
"Doors int64\n",
"Price object\n",
"dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Attribute\n",
"car_sales.dtypes\n",
"\n",
"# Function\n",
"#car_sales.to_csv()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d55320ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.columns"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "92b983d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_columns = car_sales.columns\n",
"car_columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "92937e49",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=10, step=1)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.index"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "922a7259",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f46a652c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>10.000000</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>78601.400000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>61983.471735</td>\n",
" <td>0.471405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>11179.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>35836.250000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>57369.000000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>96384.500000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>213095.000000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Odometer (KM) Doors\n",
"count 10.000000 10.000000\n",
"mean 78601.400000 4.000000\n",
"std 61983.471735 0.471405\n",
"min 11179.000000 3.000000\n",
"25% 35836.250000 4.000000\n",
"50% 57369.000000 4.000000\n",
"75% 96384.500000 4.000000\n",
"max 213095.000000 5.000000"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.describe()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "79387319",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10 entries, 0 to 9\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Make 10 non-null object\n",
" 1 Colour 10 non-null object\n",
" 2 Odometer (KM) 10 non-null int64 \n",
" 3 Doors 10 non-null int64 \n",
" 4 Price 10 non-null object\n",
"dtypes: int64(2), object(3)\n",
"memory usage: 528.0+ bytes\n"
]
}
],
"source": [
"car_sales.info()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "cbfd8da3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_18146/4073448239.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" car_sales.mean()\n"
]
},
{
"data": {
"text/plain": [
"Odometer (KM) 78601.4\n",
"Doors 4.0\n",
"dtype: float64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.mean()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "73ea13e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"376500.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_prices = pd.Series([3000, 1500, 1125000])\n",
"car_prices.mean()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "8b05884d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make ToyotaHondaToyotaBMWNissanToyotaHondaHondaToyo...\n",
"Colour WhiteRedBlueBlackWhiteGreenBlueBlueWhiteWhite\n",
"Odometer (KM) 786014\n",
"Doors 40\n",
"Price $4,000.00$5,000.00$7,000.00$22,000.00$3,500.00...\n",
"dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.sum()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "4ddbed66",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"40"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Doors\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "0fdb1df3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(car_sales)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "72af5003",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "markdown",
"id": "33cfa487",
"metadata": {},
"source": [
"## Viewing and selecting data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "23567f48",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "fe1ea0d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head(7)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "a05981ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.tail()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d3f41528",
"metadata": {},
"outputs": [],
"source": [
"# .loc & .iloc\n",
"animals = pd.Series([\"cat\", \"dog\", \"bird\", \"panda\", \"snake\"],\n",
" index=[0,3, 9, 8, 3])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "b849ece1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"8 panda\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7aaabb07",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3 dog\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.loc[3]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d3305a05",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'bird'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.loc[9]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bc0c43a5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make BMW\n",
"Colour Black\n",
"Odometer (KM) 11179\n",
"Doors 5\n",
"Price $22,000.00\n",
"Name: 3, dtype: object"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# loc refers to index\n",
"car_sales.loc[3]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "c0600348",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'panda'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# .iloc refers to position\n",
"animals.iloc[3]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f77b2a57",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"8 panda\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "99050e3c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"dtype: object"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.iloc[:3]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "a9e018ad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.loc[:3]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "cedd32fb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "1d2be05c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "4962a1fc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 White\n",
"1 Red\n",
"2 Blue\n",
"3 Black\n",
"4 White\n",
"5 Green\n",
"6 Blue\n",
"7 Blue\n",
"8 White\n",
"9 White\n",
"Name: Colour, dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Colour\"]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d4043529",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7acbc784",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.Make"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "f6d2bca3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"8 Toyota White 60000 4 $6,250.00"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[car_sales[\"Make\"] == \"Toyota\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "a9bbcefc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"4 Nissan White 213095 4 $3,500.00"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[car_sales[\"Odometer (KM)\"] > 100000]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "b2a8ee80",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Doors</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Make</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>BMW</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Honda</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nissan</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Toyota</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Doors 3 4 5\n",
"Make \n",
"BMW 0 0 1\n",
"Honda 0 3 0\n",
"Nissan 0 2 0\n",
"Toyota 1 3 0"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(car_sales[\"Make\"], car_sales[\"Doors\"])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "aa0d76c3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Make</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>BMW</th>\n",
" <td>11179.000000</td>\n",
" <td>5.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Honda</th>\n",
" <td>62778.333333</td>\n",
" <td>4.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nissan</th>\n",
" <td>122347.500000</td>\n",
" <td>4.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Toyota</th>\n",
" <td>85451.250000</td>\n",
" <td>3.75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Odometer (KM) Doors\n",
"Make \n",
"BMW 11179.000000 5.00\n",
"Honda 62778.333333 4.00\n",
"Nissan 122347.500000 4.00\n",
"Toyota 85451.250000 3.75"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Groupby\n",
"car_sales.groupby([\"Make\"]).mean()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "52390ea4",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "e7fdeb36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD4CAYAAADy46FuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA2HUlEQVR4nO3deXxU9bn48c+TnSwEsoIJEEICBEFRIqK4gwpqRay22Fa593qLtVpb23td67Wtdb1Vr9alpcp1+bVWRa3e1o1FRVHRuLEImYRFCZBJICyTAFmf3x9zhg4YJoEsZ2byvF+veWXyPed75sm8kjxzvquoKsYYY8zBxLgdgDHGmPBmicIYY0xIliiMMcaEZInCGGNMSJYojDHGhBTndgDdLSsrSwsKCtwOwxhjIsonn3yyVVWz2zsWdYmioKCAsrIyt8MwxpiIIiJfHeyYNT0ZY4wJyRKFMcaYkCxRGGOMCckShTHGmJAsURhjjAnJEoUxxpiQLFEYY4wJyRKFMZ2wbN02Vm7a6XYYxrjCEoUxnXDdC8u5/oXlbodhjCuibma2Md1td1MLX23bDcCWnXsYnN7P5YiM6V12R2FMByq89fueL1xd42IkxrjDEoUxHSj3+gBIS4xj0Wqvy9EY0/ssURjTAU+1j8S4GL49IZ/3K7fR0NjidkjG9CpLFMZ0wFNTT3FuKmcfOYim1jberah1OyRjepUlCmM64Kn2MTI3jdKCgfRPirN+CtPnWKIwJoSdu5up3rWXUblpxMfGcProHBavqaG1Td0OzZheY4nCmBA8Nf6O7JG5aQBMLcmlrqGJz77e7mZYxvQqSxTGhFBe7SSKQf5EceqobOJixJqfTJ/SYaIQkSEi8paIrBaRVSLyU6c8Q0QWiEiF83VgUJ0bRaRSRMpF5Oyg8gkissI59qCIiFOeKCLPOuXLRKQgqM5s5zUqRGR2t/70xnSgwusjNTGOI9KTAOifFM/xhRkstGGypg/pzB1FC/ALVS0BJgFXicgY4AZgkaoWA4uc73GOzQKOBKYBj4hIrHOtR4E5QLHzmOaUXw5sV9Ui4H7gbudaGcCtwPHARODW4IRkTE8r9/oYmZuK85kG8Dc/VdbUs2Frg4uRGdN7OkwUqrpFVT91nvuA1UAeMAN40jntSeAC5/kM4K+q2qiq64FKYKKIDAb6q+oHqqrAUwfUCVxrPjDFuds4G1igqnWquh1YwD+TizE9SlUpr/Yxyml2CphakgtgdxWmzzikPgqnSegYYBmQq6pbwJ9MgBzntDxgY1C1Kqcsz3l+YPl+dVS1BdgJZIa41oFxzRGRMhEpq621Me6me2ytb2L77maKc/ZPFEMykhmVm2aJwvQZnU4UIpIKvAD8TFV3hTq1nTINUX64df5ZoDpXVUtVtTQ7OztEaMZ0nsdZuuPAOwqAqWNy+HjDdnbubu7tsIzpdZ1KFCISjz9J/FlVX3SKvU5zEs7XwDCQKmBIUPV8YLNTnt9O+X51RCQOSAfqQlzLmB4XSBSBobHBppTk0tqmvO2x0U8m+nVm1JMAjwOrVfW+oEOvAIFRSLOBl4PKZzkjmYbj77T+yGme8onIJOealx1QJ3Cti4DFTj/GG8BZIjLQ6cQ+yykzpsd5vD4yUhLISk34xrHx+QPISk1gwZfW/GSiX2f2o5gMXAqsEJHPnbKbgLuA50TkcuBr4GIAVV0lIs8BX+IfMXWVqrY69a4EngD6Aa85D/AnoqdFpBL/ncQs51p1InIb8LFz3m9Ute7wflRjDk159TdHPAXExAhTRufy6sotNLW0kRBnU5JM9OowUajqe7TfVwAw5SB1bgdub6e8DBjbTvlenETTzrF5wLyO4jSmO6kqHm89Fx77jbET+0wpyeHZso18vKGOyUVZvRidMb3LPgYZ047NO/dS39jSbv9EwEnFWSTGxVjzk4l6liiMaUeoEU8ByQlxnFSUxaI1XvxdasZEJ0sUxrTDE1jjKefgiQL8o5821u3BE7RdqjHRxhKFMe0o9/oY1D+J9OT4kOdNKfHPM7XJdyaaWaIwph0er4/i3NQOz8vtn8RR+emWKExUs0RhzAFa25QKbz2jQnRkB5taksvnG3dQ49vbw5EZ4w5LFMYcYGPdbhpb2vbtQdGRqSW5qMJba2yWtolOliiMOUB5YMRTJ+8oSgancUR6km1mZKKWJQpjDhAY8VSU03EfBYCIMHVMLu9W1LK3ubXjCsZEGEsUxhyg3OtjSEY/UhI7s8KN39SSXPY2t7G0cmsPRmaMOyxRGHMAj9fX6WangOMLM0hJiLXmJxOVLFEYE6SppY11tQ0hl+5oT2JcLKeOymbRai9tbTZL20QXSxTGBNmwrYGWNg25dMfBTC3JpcbXyMrNO3sgMmPcY4nCmCDlTkf2gdufdsbpo3KIEVhoiwSaKGOJwpggHq+P2BihMDvlkOsOTEmgdFgGC6yfwkSZzuxwN09EakRkZVDZsyLyufPYENjQSEQKRGRP0LE/BNWZICIrRKRSRB50drnD2QnvWad8mYgUBNWZLSIVzmM2xvSw8mofBZnJJMXHHlb9qWNyWL1lF5t27OnmyIxxT2fuKJ4ApgUXqOp3VXW8qo7Hv5f2i0GH1waOqeqPgsofBebg3xq1OOialwPbVbUIuB+4G0BEMoBbgeOBicCtznaoxvSYipr6w+qfCJhSkgvAIlv7yUSRDhOFqi7Bvz3pNzh3Bd8Bngl1DREZDPRX1Q+cvbCfAi5wDs8AnnSezwemONc9G1igqnWquh1YwAEJy5jutLe5lQ3bDn3EU7AR2akUZqXYZkYmqnS1j+JkwKuqFUFlw0XkMxF5R0ROdsrygKqgc6qcssCxjQCq2gLsBDKDy9upY0y3q6ypR5UuJQqAqWNy+XDdNnx7m7spMmPc1dVEcQn7301sAYaq6jHAz4G/iEh/2t9zOzDY/GDHQtXZj4jMEZEyESmrra3tdPDGBAuMeOpqopgyOofmVuXdCpulbaLDYScKEYkDLgSeDZSpaqOqbnOefwKsBUbivxvID6qeD2x2nlcBQ4KumY6/qWtfeTt19qOqc1W1VFVLs7OzD/dHMn2cx+sjITaGgszkLl1nwrCBDEiOt2GyJmp05Y5iKrBGVfc1KYlItojEOs8L8Xdar1PVLYBPRCY5/Q+XAS871V4BAiOaLgIWO/0YbwBnichApxP7LKfMmB7h8foYkZNKXGzXbrTjYmM4Y1QOb5XX0NLa1k3RGeOezgyPfQb4ABglIlUicrlzaBbf7MQ+BVguIl/g75j+kaoGOsKvBB4DKvHfabzmlD8OZIpIJf7mqhsAnHq3AR87j98EXcuYbufx1jOqE7vadcaUkly2727m0693dMv1jHFTh8tjquolByn/l3bKXsA/XLa988uAse2U7wUuPkidecC8jmI0pqt8e5vZtGMP38sd2i3XO2VkFvGxwsLVXiYOz+iWaxrjFpuZbQz+uwno/GZFHUlLimdSYabtpW2igiUKY/D3TwBdmmx3oKkluayrbWBtbX23XdMYN1iiMAZ/okhOiCVvQL9uu+aUkhzAZmmbyGeJwhj8iaI4N42YmPam7xye/IHJlAzub5sZmYhnicIYoLy6npGd3CP7UEwtyaFsQx3bG5q6/drG9BZLFKbP21bfyNb6xm7tnwiYWpJLm8LbHrurMJHLEoXp8wIjnrq6dEd7xuWlk5OWyMIvLVGYyGWJwvR5FTXdP+IpICZGmFKSwzueWppabJa2iUyWKEyfV17tI71fPDlpiT1y/akludQ3trBs/bYeub4xPc0ShenzPF4fI3NTcTZd7HaTi7JIio+xRQJNxLJEYfo0VaW82tcj/RMBSfGxnFSUzcLVNfjXuzQmsliiMH2ad1cju/a29Ej/RLAzx+Swacce1jh7XhgTSSxRmD4tsHRHT95RAJwxOhcRrPnJRCRLFKZP661EkZ2WyNH5A2yRQBORLFGYPq282kdWaiIZKQk9/lpnjsnli6qd1Oza2+OvZUx3skRh+jSP18eoQd2/dEd7ppbkArBojU2+M5GlMzvczRORGhFZGVT2KxHZJCKfO49zgo7dKCKVIlI
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Odometer (KM)\"].plot()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "fc72e078",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATAklEQVR4nO3dcayddX3H8fdX2k3GxYKWXZuCFrduiaOJ0htgcZp7l+lKZSPb+ANCQNhMg4NEt5JQNUH3xxJ0wUyCsXaROBbm1QXdCNQ4Q7giW5i0rLQUZHasyUq7EjErXml01e/+OE/Xw+k59zzn3uece/zt/UpO7jnP8zvP+dzfefj03Oc85xCZiSTpZ99rljuAJKkZFrokFcJCl6RCWOiSVAgLXZIKsWK5Hnj16tW5bt265Xr40/zwhz/krLPOWu4YpzFXfeOYCcw1iHHMBOOVa/fu3d/LzPO6rszMZbls3Lgxx8kjjzyy3BG6Mld945gp01yDGMdMmeOVC9iVPXrVQy6SVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEH0LPSJeGxHfjoinImJ/RPxZlzEREXdFxIGI2BsRFw8nriSplzofLPoR8JuZOR8RK4HHIuJrmfl425jLgfXV5VLgs9VPSdKI9H2FXp3LPl/dXFldOr9E/Urg3mrs48A5EbGm2aiSpIVE1vgfXETEGcBu4JeBz2TmbR3rHwTuyMzHqtsPA7dl5q6OcVuALQCTk5MbZ2dnG/klmjA/P8/ExETfcfteODaCNKdMnglHj8OGtatG+rj91J2vURrHTGCuQYxjJhivXDMzM7szc6rbulrf5ZKZPwHeFhHnAF+NiIsy8+m2IdHtbl22swPYATA1NZXT09N1Hn4k5ubmqJPnhm0PDT9Mm60bTnDnvhUcvHZ6pI/bT935GqVxzATmGsQ4ZoLxzdVpoLNcMvO/gTlgU8eqQ8AFbbfPBw4vJZgkaTB1znI5r3plTkScCfwW8J2OYQ8A11dnu1wGHMvMI02HlST1VueQyxrgr6vj6K8BvpyZD0bETQCZuR3YCWwGDgCvADcOKa8kqYe+hZ6Ze4G3d1m+ve16Ajc3G02SNAg/KSpJhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5Jhehb6BFxQUQ8EhHPRsT+iPhglzHTEXEsIvZUl9uHE1eS1MuKGmNOAFsz88mIOBvYHRHfyMxnOsZ9KzOvaD6iJKmOvq/QM/NIZj5ZXf8B8CywdtjBJEmDicysPzhiHfAocFFmvty2fBq4HzgEHAZuzcz9Xe6/BdgCMDk5uXF2dnYJ0Zs1Pz/PxMRE33H7Xjg2gjSnTJ4JR4/DhrWrRvq4/dSdr1Eax0xgrkGMYyYYr1wzMzO7M3Oq27rahR4RE8A3gT/PzK90rHsd8NPMnI+IzcCnM3P9QtubmprKXbt21XrsUZibm2N6errvuHXbHhp+mDZbN5zgzn0rOHjHe0f6uP3Una9RGsdMYK5BjGMmGK9cEdGz0Gud5RIRK2m9Ar+vs8wBMvPlzJyvru8EVkbE6iVkliQNqM5ZLgF8Hng2Mz/VY8wbq3FExCXVdl9qMqgkaWF1znJ5B3AdsC8i9lTLPgK8CSAztwNXAR+IiBPAceDqHOTgvCRpyfoWemY+BkSfMXcDdzcVSpI0OD8pKkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRB9Cz0iLoiIRyLi2YjYHxEf7DImIuKuiDgQEXsj4uLhxJUk9bKixpgTwNbMfDIizgZ2R8Q3MvOZtjGXA+ury6XAZ6ufkqQR6fsKPTOPZOaT1fUfAM8CazuGXQncmy2PA+dExJrG00qSeorMrD84Yh3wKHBRZr7ctvxB4I7MfKy6/TBwW2bu6rj/FmALwOTk5MbZ2dkl/wJNmZ+fZ2Jiou+4fS8cG0GaUybPhKPHR/qQp9mwdtVpy+rO1yiNYyYw1yDGMROMV66ZmZndmTnVbV2dQy4ARMQEcD/wofYyP7m6y11O+5ciM3cAOwCmpqZyenq67sMP3dzcHHXy3LDtoeGHabN1wwnu3Ff7aRqKg9dOn7as7nyN0jhmAnMNYhwzwfjm6lTrLJeIWEmrzO/LzK90GXIIuKDt9vnA4aXHkyTVVecslwA+DzybmZ/qMewB4PrqbJfLgGOZeaTBnJKkPur8Lf8O4DpgX0TsqZZ9BHgTQGZuB3YCm4EDwCvAjY0nlSQtqG+hV290djtG3j4mgZubCiVJGpyfFJWkQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQvQt9Ii4JyJejIine6yfjohjEbGnutzefExJUj8raoz5AnA3cO8CY76VmVc0kkiStCh9X6Fn5qPA90eQRZK0BJGZ/QdFrAMezMyLuqybBu4HDgGHgVszc3+P7WwBtgBMTk5unJ2dXWzuxs3PzzMxMdF33L4Xjo0gzSmTZ8LR4yN9yNNsWLvqtGV152uUxjETmGsQ45gJxivXzMzM7syc6rauiUJ/HfDTzJyPiM3ApzNzfb9tTk1N5a5du/o+9qjMzc0xPT3dd9y6bQ8NP0ybrRtOcOe+OkfGhufgHe89bVnd+RqlccwE5hrEOGaC8coVET0LfclnuWTmy5k5X13fCayMiNVL3a4kaTBLLvSIeGNERHX9kmqbLy11u5KkwfT9Wz4ivghMA6sj4hDwMWAlQGZuB64CPhARJ4DjwNVZ5ziOJKlRfQs9M6/ps/5uWqc1SpKWkZ8UlaRCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVIi+hR4R90TEixHxdI/1ERF3RcSBiNgbERc3H1OS1E+dV+hfADYtsP5yYH112QJ8dumxJEmD6lvomfko8P0FhlwJ3JstjwPnRMSapgJKkuqJzOw/KGId8GBmXtRl3YPAHZn5WHX7YeC2zNzVZewWWq/imZyc3Dg7O7uo0PteOLao+y1k8kw4erzxzS6ZueprMtOGtaua2RAwPz/PxMRErbHD2Ld7aZ+vJn/fpRhkrpZqkLluen9fynzPzMzszsypbutWLHqrp0SXZV3/lcjMHcAOgKmpqZyenl7UA96w7aFF3W8hWzec4M59TUxHs8xVX5OZDl473ch2AObm5qi7rw9j3+6lfb6a/H2XYpC5WqpB5rrp/X1Y893EWS6HgAvabp8PHG5gu5KkATRR6A8A11dnu1wGHMvMIw1sV5I0gL5/Q0TEF4FpYHVEHAI+BqwEyMztwE5gM3AAeAW4cVhhJUm99S30zLymz/oEbm4skSRpUfykqCQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVolahR8SmiHguIg5ExLYu66cj4lhE7KkutzcfVZK0kBX9BkTEGcBngHcDh4AnIuKBzHymY+i3MvOKIWSUJNVQ5xX6JcCBzHw+M38MzAJXDjeWJGlQkZkLD4i4CtiUme+vbl8HXJqZt7SNmQbup/UK/jBwa2bu77KtLcAWgMnJyY2zs7OLCr3vhWO
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Odometer (KM)\"].hist()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "b30e787b",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "no numeric data to plot",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [36]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcar_sales\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPrice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_core.py:972\u001b[0m, in \u001b[0;36mPlotAccessor.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 969\u001b[0m label_name \u001b[38;5;241m=\u001b[39m label_kw \u001b[38;5;129;01mor\u001b[39;00m data\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[1;32m 970\u001b[0m data\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m=\u001b[39m label_name\n\u001b[0;32m--> 972\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mplot_backend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/__init__.py:71\u001b[0m, in \u001b[0;36mplot\u001b[0;34m(data, kind, **kwargs)\u001b[0m\n\u001b[1;32m 69\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124max\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(ax, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft_ax\u001b[39m\u001b[38;5;124m\"\u001b[39m, ax)\n\u001b[1;32m 70\u001b[0m plot_obj \u001b[38;5;241m=\u001b[39m PLOT_CLASSES[kind](data, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 71\u001b[0m \u001b[43mplot_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 72\u001b[0m plot_obj\u001b[38;5;241m.\u001b[39mdraw()\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m plot_obj\u001b[38;5;241m.\u001b[39mresult\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/core.py:327\u001b[0m, in \u001b[0;36mMPLPlot.generate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_args_adjust()\n\u001b[0;32m--> 327\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compute_plot_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setup_subplots()\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_plot()\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/core.py:506\u001b[0m, in \u001b[0;36mMPLPlot._compute_plot_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;66;03m# no non-numeric frames or series allowed\u001b[39;00m\n\u001b[1;32m 505\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_empty:\n\u001b[0;32m--> 506\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno numeric data to plot\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 508\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m numeric_data\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_convert_to_ndarray)\n",
"\u001b[0;31mTypeError\u001b[0m: no numeric data to plot"
]
}
],
"source": [
"car_sales[\"Price\"].plot()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "bb9b5864",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_4212/3108854531.py:1: FutureWarning: The default value of regex will change from True to False in a future version.\n",
" car_sales[\"Price\"] = car_sales[\"Price\"].str.replace('[\\$\\,\\.]','').astype(int)\n"
]
}
],
"source": [
"car_sales[\"Price\"] = car_sales[\"Price\"].str.replace('[\\$\\,\\.]','').astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "37128899",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 400000\n",
"1 500000\n",
"2 700000\n",
"3 2200000\n",
"4 350000\n",
"5 450000\n",
"6 750000\n",
"7 700000\n",
"8 625000\n",
"9 970000\n",
"Name: Price, dtype: int64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Price\"]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "962db850",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEDCAYAAAA7jc+ZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAs90lEQVR4nO3deXxcd3nv8c+jzdpGkm1JI+/yIsmx49g4ip2FJQklccKSQoEmZIGQ4FtKKNxSWrYX9JZL6S2F0pYEExKTBLJQyAKlJg6UkISExHYSjW3F+zpjW6s92vd57h8zo8i2lpE0M2eW5/166SXNOWc0jybR10e/85zfT1QVY4wxqSvD6QKMMcbElgW9McakOAt6Y4xJcRb0xhiT4izojTEmxVnQG2NMikvYoBeRzSLSJCK7Izz+wyLyhojUi8gjsa7PGGOShSRqH72IvB3oBB5S1QsnOLYK+E/galU9IyLlqtoUjzqNMSbRJewZvao+D5weuU1ElorI0yLyqoi8ICLLQ7s+AdytqmdCz7WQN8aYkIQN+jHcC3xaVS8G/ga4J7S9GqgWkRdF5GUR2eBYhcYYk2CynC4gUiJSCFwO/ExEwptnhD5nAVXAlcB84AURuVBV/XEu0xhjEk7SBD3Bvz78qrpmlH0+4GVVHQCOiMg+gsG/PY71GWNMQkqaoRtVbScY4h8CkKDVod1PAVeFtpcSHMo57ESdxhiTaBI26EXkUeCPQI2I+ETkDuBm4A4R8QD1wA2hw7cCrSLyBvAs8HlVbXWibmOMSTQJ215pjDEmOhL2jN4YY0x0JOTF2NLSUq2srHS6DGOMSRqvvvpqi6qWjbYvIYO+srKSHTt2OF2GMcYkDRE5Nta+CYduRGSBiDwrIntC88h8ZpRjbhaRnaGPl0Z0wyAiR0Vkl4jUiYiltzHGxFkkZ/SDwOdU9TURcQGvishvVPWNEcccAd4RmmfmOoJ3sK4fsf8qVW2JXtnGGGMiNWHQq+op4FTo6w4R2QPMA94YccxLI57yMsG7U40xxiSASXXdiEgl8BbglXEOuwP49YjHCjwTmohs4zjfe6OI7BCRHc3NzZMpyxhjzDgivhgbmmvmceCzobtURzvmKoJB/9YRm69Q1ZMiUg78RkT2hmamPIuq3ktwyIfa2lpr7jfGmCiJ6IxeRLIJhvzDqvrEGMdcBNwH3DDyrlRVPRn63AQ8CaybbtHGGGMiF0nXjQD3A3tU9TtjHLMQeAK4VVX3j9heELqAi4gUANcAEa0YZYwxJjoiGbq5ArgV2CUidaFtXwIWAqjqJuCrwGzgntAUwoOqWgu4gSdD27KAR1T16Wj+ACaxnWrrweP1s+HCOU6XYkzaiqTr5g+ATHDMncCdo2w/DKw+/xkmXfzgucM88NJRdv79NRTlZjtdjjFpyea6MTH1utcPwIHGTmcLMSaNWdCbmOkbHGLPyWCD1v7GDoerMSZ9WdCbmNl7qoP+oQAA+xos6I1xigW9iRmPzw9ARVGundEb4yALehMzdV4/pYUzeFtVKfttjN4Yx1jQm5jxeP2sWVBMTYWLls4+Wjv7nC7JmLRkQW9ior13gEPNXayeX0K12wVgZ/XGOMSC3sTELl8bAKsXlFBTEQ56G6c3xgkJucKUSX51of751fNLKMrLojgvm30W9MY4woLexITH62dJaQHF+cG7YavdhRywoDfGETZ0Y2LC4/OzekHJ8ONqt4t9DR2o2gzUxsSbBb2Juoa2Xhrb+1g9v3h4W02Fi/beQRrbrfPGmHizoDdRNzw+f84ZPWDj9MY4wILeRJ3H5yc7U7hgTtHwtuEWS5sKwZi4s6A3Uefx+rlgThG52ZnD22YV5FBaOMNaLI1xgAW9iapAQNnpa2P1/JLz9tVUFFrQG+MAC3oTVYdbOunsGzxrfD6s2u1if2MngYB13hgTT5GsGbtARJ4VkT0iUi8inxnlGBGRfxeRgyKyU0TWjti3QUT2hfZ9Ido/gEksdd7gHbFrFhSft6/G7aJnYAjfmZ54l2VMWovkjH4Q+JyqXgBcCnxKRFacc8x1QFXoYyPwfQARyQTuDu1fAdw0ynNNCvF4/RTOyGJJaeF5+6ptKgRjHDFh0KvqKVV9LfR1B7AHmHfOYTcAD2nQy0CJiMwB1gEHVfWwqvYDj4WONSnK4/Nz0fxiMjLOX2a4qjwY/tZiaUx8TWqMXkQqgbcAr5yzax7gHfHYF9o21naTgnoHhthzqn3U8XkAV24280ry7IzemDiLOOhFpBB4HPisqrafu3uUp+g420f7/htFZIeI7Ghubo60LJNA9pxqZ2BIR+24Cat2F9qygsbEWURBLyLZBEP+YVV9YpRDfMCCEY/nAyfH2X4eVb1XVWtVtbasrCySskyC8YTuiF0zxhk9BMfpDzd3MRhaS9YYE3uRdN0IcD+wR1W/M8ZhvwRuC3XfXAq0qeopYDtQJSKLRSQHuDF0rElBHl8b7qIZVBTnjnlMjdtF/1CAo63dcazMmPQWyTTFVwC3ArtEpC607UvAQgBV3QRsAa4HDgLdwO2hfYMichewFcgENqtqfTR/AJM4PF7/uMM2MGIqhMYOlpWf35ljjIm+CYNeVf/A6GPtI49R4FNj7NtC8B8Ck8Laugc43NLFn108f9zjlpUXIgL7Gjq4ftWcOFVnTHqzO2NNVOw84QfGH58HyM3OpHJ2gXXeGBNHFvQmKsIXYlfNP/+O2HNVuwutl96YOLKgN1FR521jaVkBRbnZEx5b43ZxrLWb3oGhOFRmjLGgN9OmqtR5/WPeKHWuKreLoYByuLkrtoUZYwALehMFp9p6aensm3B8PqzG5rwxJq4s6M20hcfnJ2qtDKucXUB2ptg4vTFxYkFvpq3O5ycnM4Plc1wRHZ+TlcGS0kJbVtCYOLGgN9Pm8fq5YG4RM7IyJz44pLrCxf4mC3pj4sGC3kzLUEDZ5WtjTQRtlSNVlxfiPd1DV99gjCozxoRZ0JtpOdTcSVf/UMQdN2HhRUgONHXGoCpjzEgW9GZa6sIXYicZ9DXhOW9snN6YmLOgN9Pi8fpx5WaxeHbBpJ63YFY+udkZ1nljTBxY0Jtp8fiCM1aOtnTgeDIzhKpyl/XSGxMHFvRmynoHhth7qoPVCyZ3ITasyl1oQW9MHFjQmymrP9nOYGD8pQPHU+N20djeh7+7P7qFGWPOYkFvpiySpQPHUz08FYJ13hgTSxb0Zso8Pj9zinMpLxp76cDxhDtv7IKsMbFlQW+mLJKlA8czpzgX14wsDljQGxNTkSwOvllEmkRk9xj7Py8idaGP3SIyJCKzQvuOisiu0L4d0S7eOMff3c/R1u5J98+PJCJUuQvZZ730xsRUJGf0DwAbxtqpqt9S1TWqugb4IvCcqp4ecchVof2106rUJBSPrw1gyh03YTUVwRbL4LLDxphYmDDoVfV54PREx4XcBDw6rYpMUqg77kcEVs2bXtBXu12c6R6gubMvSpUZY84VtTF6EckneOb/+IjNCjwjIq+KyMYJnr9RRHaIyI7m5uZolWVixOPzs6ysEFcESweO582pEKzzxphYiebF2PcCL54zbHOFqq4FrgM+JSJvH+vJqnqvqtaqam1ZWVkUyzLRpqrBC7HTGJ8Pq7bVpoyJuWgG/Y2cM2yjqidDn5uAJ4F1UXw94xDfmR5au/qjEvSlhTOYVZBjQW9MDEUl6EWkGHgH8IsR2wpExBX+GrgGGLVzxyQXj88PwJpptFaOVO0utF56Y2Ioa6IDRORR4EqgVER8wNeAbABV3RQ67P3AM6raNeKpbuBJEQm/ziOq+nT0SjdO8Xj95GRlDC/yPV01bhc/f9WHqhL6/8UYE0UTBr2q3hTBMQ8QbMMcue0wsHqqhZnE5fG2sXJuETlZ0Rn5q65w0dU/xAl/D/Nn5kflexpj3mR3xppJGRwKsOtE27TuiD1XuPPmgM15Y0xMWNCbSTnQ1EnPwNCUJzIbTZXNeWNMTFnQm0nxTHHpwPEU52VTUZRrywoaEyMW9GZSPD4/RblZVM6O7lh6dYXLzuiNiRELejMpdd42Vi8oiXp3TI27kINNnQwFbM4bY6LNgt5
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Price\"].plot()"
]
},
{
"cell_type": "markdown",
"id": "4413ac95",
"metadata": {},
"source": [
"## Manipulating Data"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "69eba555",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 toyota\n",
"1 honda\n",
"2 toyota\n",
"3 bmw\n",
"4 nissan\n",
"5 toyota\n",
"6 honda\n",
"7 honda\n",
"8 toyota\n",
"9 nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"].str.lower()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "ad6ee6b8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 400000\n",
"1 Honda Red 87899 4 500000\n",
"2 Toyota Blue 32549 3 700000\n",
"3 BMW Black 11179 5 2200000\n",
"4 Nissan White 213095 4 350000\n",
"5 Toyota Green 99213 4 450000\n",
"6 Honda Blue 45698 4 750000\n",
"7 Honda Blue 54738 4 700000\n",
"8 Toyota White 60000 4 625000\n",
"9 Nissan White 31600 4 970000"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "cbbf9d5b",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Make\"] = car_sales[\"Make\"].str.lower()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "98b177e9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 toyota White 150043 4 400000\n",
"1 honda Red 87899 4 500000\n",
"2 toyota Blue 32549 3 700000\n",
"3 bmw Black 11179 5 2200000\n",
"4 nissan White 213095 4 350000\n",
"5 toyota Green 99213 4 450000\n",
"6 honda Blue 45698 4 750000\n",
"7 honda Blue 54738 4 700000\n",
"8 toyota White 60000 4 625000\n",
"9 nissan White 31600 4 970000"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "65b2f115",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 toyota White 150043 4 400000\n",
"1 honda Red 87899 4 500000\n",
"2 toyota Blue 32549 3 700000\n",
"3 bmw Black 11179 5 2200000\n",
"4 nissan White 213095 4 350000\n",
"5 toyota Green 99213 4 450000\n",
"6 honda Blue 45698 4 750000\n",
"7 honda Blue 54738 4 700000\n",
"8 toyota White 60000 4 625000\n",
"9 nissan White 31600 4 970000"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "bd67bc4a",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing = pd.read_csv(\"car-sales-missing-data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "63f67964",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "97464074",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"92302.66666666667"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing[\"Odometer\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "8ba91fcd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 150043.000000\n",
"1 87899.000000\n",
"2 92302.666667\n",
"3 11179.000000\n",
"4 213095.000000\n",
"5 92302.666667\n",
"6 92302.666667\n",
"7 92302.666667\n",
"8 60000.000000\n",
"9 31600.000000\n",
"Name: Odometer, dtype: float64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing[\"Odometer\"].fillna(car_sales_missing[\"Odometer\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "4b053d9f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "8bc146ad",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing[\"Odometer\"].fillna(car_sales_missing[\"Odometer\"].mean(),\n",
" inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "8f6a17f8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.000000</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500\n",
"6 Honda NaN 92302.666667 4.0 $7,500\n",
"7 Honda Blue 92302.666667 4.0 NaN\n",
"8 Toyota White 60000.000000 NaN NaN\n",
"9 NaN White 31600.000000 4.0 $9,700"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "7059b936",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "36e52a55",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.000000</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500\n",
"6 Honda NaN 92302.666667 4.0 $7,500\n",
"7 Honda Blue 92302.666667 4.0 NaN\n",
"8 Toyota White 60000.000000 NaN NaN\n",
"9 NaN White 31600.000000 4.0 $9,700"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "48a80b90",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "23c87cea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "07ef69f1",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing = pd.read_csv(\"car-sales-missing-data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "1392f357",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "2c141c98",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing_dropped = car_sales_missing.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "620ef1af",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing_dropped"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "781e9bf0",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing_dropped.to_csv(\"car-sales-missing-dropped.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "9bccd7ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats\n",
"0 toyota White 150043 4 400000 5.0\n",
"1 honda Red 87899 4 500000 5.0\n",
"2 toyota Blue 32549 3 700000 5.0\n",
"3 bmw Black 11179 5 2200000 5.0\n",
"4 nissan White 213095 4 350000 5.0\n",
"5 toyota Green 99213 4 450000 NaN\n",
"6 honda Blue 45698 4 750000 NaN\n",
"7 honda Blue 54738 4 700000 NaN\n",
"8 toyota White 60000 4 625000 NaN\n",
"9 nissan White 31600 4 970000 NaN"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Column from series\n",
"seats_column = pd.Series([5, 5, 5, 5, 5,])\n",
"\n",
"# New column called seats\n",
"car_sales[\"Seats\"] = seats_column\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "c4f2c1e4",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Seats\"].fillna(5, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "8de131a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats\n",
"0 toyota White 150043 4 400000 5.0\n",
"1 honda Red 87899 4 500000 5.0\n",
"2 toyota Blue 32549 3 700000 5.0\n",
"3 bmw Black 11179 5 2200000 5.0\n",
"4 nissan White 213095 4 350000 5.0\n",
"5 toyota Green 99213 4 450000 5.0\n",
"6 honda Blue 45698 4 750000 5.0\n",
"7 honda Blue 54738 4 700000 5.0\n",
"8 toyota White 60000 4 625000 5.0\n",
"9 nissan White 31600 4 970000 5.0"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "8b823fad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM\n",
"0 toyota White 150043 4 400000 5.0 7.5\n",
"1 honda Red 87899 4 500000 5.0 9.2\n",
"2 toyota Blue 32549 3 700000 5.0 5.0\n",
"3 bmw Black 11179 5 2200000 5.0 9.6\n",
"4 nissan White 213095 4 350000 5.0 8.7\n",
"5 toyota Green 99213 4 450000 5.0 4.7\n",
"6 honda Blue 45698 4 750000 5.0 7.6\n",
"7 honda Blue 54738 4 700000 5.0 8.7\n",
"8 toyota White 60000 4 625000 5.0 3.0\n",
"9 nissan White 31600 4 970000 5.0 4.5"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Column from Python list\n",
"fuel_economy = [7.5, 9.2, 5.0, 9.6, 8.7, 4.7, 7.6, 8.7, 3.0, 4.5]\n",
"car_sales[\"Fuel per 100KM\"] = fuel_economy\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "9a181add",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Total fuel used (L)\"] = car_sales[\"Odometer (KM)\"]/100 * car_sales[\"Fuel per 100KM\"]"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "20131395",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used</th>\n",
" <th>Total fuel used (L)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>11253.225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>8086.708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>1627.450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>1073.184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>18539.265</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4663.011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>3473.048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4762.206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>1800.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>1422.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used Total fuel used (L) \n",
"0 11253.225 11253.225 \n",
"1 8086.708 8086.708 \n",
"2 1627.450 1627.450 \n",
"3 1073.184 1073.184 \n",
"4 18539.265 18539.265 \n",
"5 4663.011 4663.011 \n",
"6 3473.048 3473.048 \n",
"7 4762.206 4762.206 \n",
"8 1800.000 1800.000 \n",
"9 1422.000 1422.000 "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "d9f81e85",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used Total fuel used (L) Number of wheels \n",
"0 11253.225 11253.225 4 \n",
"1 8086.708 8086.708 4 \n",
"2 1627.450 1627.450 4 \n",
"3 1073.184 1073.184 4 \n",
"4 18539.265 18539.265 4 \n",
"5 4663.011 4663.011 4 \n",
"6 3473.048 3473.048 4 \n",
"7 4762.206 4762.206 4 \n",
"8 1800.000 1800.000 4 \n",
"9 1422.000 1422.000 4 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a column from a single value\n",
"car_sales[\"Number of wheels\"] = 4\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "6859af53",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Passed road safety\"] = True"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "a289dd85",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make object\n",
"Colour object\n",
"Odometer (KM) int64\n",
"Doors int64\n",
"Price int64\n",
"Seats float64\n",
"Fuel per 100KM float64\n",
"Total fuel used float64\n",
"Total fuel used (L) float64\n",
"Number of wheels int64\n",
"Passed road safety bool\n",
"dtype: object"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "a7c11bc0",
"metadata": {},
"outputs": [],
"source": [
"car_sales = car_sales.drop(\"Total fuel used\", axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "15d32814",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 11253.225 4 True \n",
"1 8086.708 4 True \n",
"2 1627.450 4 True \n",
"3 1073.184 4 True \n",
"4 18539.265 4 True \n",
"5 4663.011 4 True \n",
"6 3473.048 4 True \n",
"7 4762.206 4 True \n",
"8 1800.000 4 True \n",
"9 1422.000 4 True "
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "34c60bf4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"1 8086.708 4 True \n",
"9 1422.000 4 True \n",
"7 4762.206 4 True \n",
"2 1627.450 4 True \n",
"3 1073.184 4 True "
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.sample(frac=0.5)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "6ae2e87d",
"metadata": {},
"outputs": [],
"source": [
"car_sales_shuffled = car_sales.sample(frac=1)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "46c3d98c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 11253.225 4 True \n",
"8 1800.000 4 True "
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_shuffled.sample(frac=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "bf846d5d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 213095 4 350000 5.0 8.7 \n",
"1 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 nissan White 31600 4 970000 5.0 4.5 \n",
"3 toyota Blue 32549 3 700000 5.0 5.0 \n",
"4 honda Blue 45698 4 750000 5.0 7.6 \n",
"5 toyota White 60000 4 625000 5.0 3.0 \n",
"6 honda Red 87899 4 500000 5.0 9.2 \n",
"7 bmw Black 11179 5 2200000 5.0 9.6 \n",
"8 toyota White 150043 4 400000 5.0 7.5 \n",
"9 toyota Green 99213 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales = car_sales_shuffled.reset_index(drop=True)\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "b9220ca6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 213095 4 350000 5.0 8.7 \n",
"1 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 nissan White 31600 4 970000 5.0 4.5 \n",
"3 toyota Blue 32549 3 700000 5.0 5.0 \n",
"4 honda Blue 45698 4 750000 5.0 7.6 \n",
"5 toyota White 60000 4 625000 5.0 3.0 \n",
"6 honda Red 87899 4 500000 5.0 9.2 \n",
"7 bmw Black 11179 5 2200000 5.0 9.6 \n",
"8 toyota White 150043 4 400000 5.0 7.5 \n",
"9 toyota Green 99213 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "bdcc6512",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>133184.375</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>34211.250</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>19750.000</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>20343.125</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>28561.250</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>37500.000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>54936.875</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>6986.875</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>93776.875</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>62008.125</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 133184.375 4 350000 5.0 8.7 \n",
"1 honda Blue 34211.250 4 700000 5.0 8.7 \n",
"2 nissan White 19750.000 4 970000 5.0 4.5 \n",
"3 toyota Blue 20343.125 3 700000 5.0 5.0 \n",
"4 honda Blue 28561.250 4 750000 5.0 7.6 \n",
"5 toyota White 37500.000 4 625000 5.0 3.0 \n",
"6 honda Red 54936.875 4 500000 5.0 9.2 \n",
"7 bmw Black 6986.875 5 2200000 5.0 9.6 \n",
"8 toyota White 93776.875 4 400000 5.0 7.5 \n",
"9 toyota Green 62008.125 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Odometer (KM)\"] = car_sales[\"Odometer (KM)\"].apply(lambda x: x / 1.6)\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d3da940",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}