ml-course/sample_project/introduction-to-pandas.ipynb

5915 lines
208 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "d0b99ebe",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "18b71e0d",
"metadata": {},
"outputs": [],
"source": [
"# 2 main datatypes\n",
"series = pd.Series([\"BMW\", \"Toyota\", \"Honda\"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5fd5c8ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 BMW\n",
"1 Toyota\n",
"2 Honda\n",
"dtype: object"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8293df83",
"metadata": {},
"outputs": [],
"source": [
"# series = 1-dimensional"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7ce01316",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Red\n",
"1 Blue\n",
"2 White\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"colours = pd.Series([\"Red\", \"Blue\", \"White\"])\n",
"colours"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3df244ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Car make</th>\n",
" <th>Colur</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BMW</td>\n",
" <td>Red</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Honda</td>\n",
" <td>White</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Car make Colur\n",
"0 BMW Red\n",
"1 Toyota Blue\n",
"2 Honda White"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame = 2-dimensional\n",
"car_data = pd.DataFrame({\"Car make\": series, \"Colur\": colours})\n",
"car_data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6269eb7",
"metadata": {},
"outputs": [],
"source": [
"# Import data\n",
"car_sales = pd.read_csv(\"car-sales.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5ed55160",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "118c9363",
"metadata": {},
"outputs": [],
"source": [
"# Exporting a dataframe\n",
"# car_sales.to_csv(\"exported.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"id": "0be3c88e",
"metadata": {},
"source": [
"## Describe data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f6ae0796",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make object\n",
"Colour object\n",
"Odometer (KM) int64\n",
"Doors int64\n",
"Price object\n",
"dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Attribute\n",
"car_sales.dtypes\n",
"\n",
"# Function\n",
"#car_sales.to_csv()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d55320ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.columns"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "92b983d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_columns = car_sales.columns\n",
"car_columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "92937e49",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=10, step=1)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.index"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "922a7259",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f46a652c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>10.000000</td>\n",
" <td>10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>78601.400000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>61983.471735</td>\n",
" <td>0.471405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>11179.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>35836.250000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>57369.000000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>96384.500000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>213095.000000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Odometer (KM) Doors\n",
"count 10.000000 10.000000\n",
"mean 78601.400000 4.000000\n",
"std 61983.471735 0.471405\n",
"min 11179.000000 3.000000\n",
"25% 35836.250000 4.000000\n",
"50% 57369.000000 4.000000\n",
"75% 96384.500000 4.000000\n",
"max 213095.000000 5.000000"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.describe()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "79387319",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10 entries, 0 to 9\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Make 10 non-null object\n",
" 1 Colour 10 non-null object\n",
" 2 Odometer (KM) 10 non-null int64 \n",
" 3 Doors 10 non-null int64 \n",
" 4 Price 10 non-null object\n",
"dtypes: int64(2), object(3)\n",
"memory usage: 528.0+ bytes\n"
]
}
],
"source": [
"car_sales.info()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "cbfd8da3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_18146/4073448239.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" car_sales.mean()\n"
]
},
{
"data": {
"text/plain": [
"Odometer (KM) 78601.4\n",
"Doors 4.0\n",
"dtype: float64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.mean()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "73ea13e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"376500.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_prices = pd.Series([3000, 1500, 1125000])\n",
"car_prices.mean()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "8b05884d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make ToyotaHondaToyotaBMWNissanToyotaHondaHondaToyo...\n",
"Colour WhiteRedBlueBlackWhiteGreenBlueBlueWhiteWhite\n",
"Odometer (KM) 786014\n",
"Doors 40\n",
"Price $4,000.00$5,000.00$7,000.00$22,000.00$3,500.00...\n",
"dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.sum()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "4ddbed66",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"40"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Doors\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "0fdb1df3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(car_sales)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "72af5003",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "markdown",
"id": "33cfa487",
"metadata": {},
"source": [
"## Viewing and selecting data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "23567f48",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "fe1ea0d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00\n",
"4 Nissan White 213095 4 $3,500.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head(7)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "a05981ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>$7,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>$9,700.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"6 Honda Blue 45698 4 $7,500.00\n",
"7 Honda Blue 54738 4 $7,000.00\n",
"8 Toyota White 60000 4 $6,250.00\n",
"9 Nissan White 31600 4 $9,700.00"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.tail()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d3f41528",
"metadata": {},
"outputs": [],
"source": [
"# .loc & .iloc\n",
"animals = pd.Series([\"cat\", \"dog\", \"bird\", \"panda\", \"snake\"],\n",
" index=[0,3, 9, 8, 3])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "b849ece1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"8 panda\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7aaabb07",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3 dog\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.loc[3]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d3305a05",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'bird'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.loc[9]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bc0c43a5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make BMW\n",
"Colour Black\n",
"Odometer (KM) 11179\n",
"Doors 5\n",
"Price $22,000.00\n",
"Name: 3, dtype: object"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# loc refers to index\n",
"car_sales.loc[3]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "c0600348",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'panda'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# .iloc refers to position\n",
"animals.iloc[3]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f77b2a57",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"8 panda\n",
"3 snake\n",
"dtype: object"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "99050e3c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 cat\n",
"3 dog\n",
"9 bird\n",
"dtype: object"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"animals.iloc[:3]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "a9e018ad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.loc[:3]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "cedd32fb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>$5,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>$22,000.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"1 Honda Red 87899 4 $5,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"3 BMW Black 11179 5 $22,000.00"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "1d2be05c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "4962a1fc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 White\n",
"1 Red\n",
"2 Blue\n",
"3 Black\n",
"4 White\n",
"5 Green\n",
"6 Blue\n",
"7 Blue\n",
"8 White\n",
"9 White\n",
"Name: Colour, dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Colour\"]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d4043529",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7acbc784",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Toyota\n",
"1 Honda\n",
"2 Toyota\n",
"3 BMW\n",
"4 Nissan\n",
"5 Toyota\n",
"6 Honda\n",
"7 Honda\n",
"8 Toyota\n",
"9 Nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.Make"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "f6d2bca3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>$7,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>$4,500.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>$6,250.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"2 Toyota Blue 32549 3 $7,000.00\n",
"5 Toyota Green 99213 4 $4,500.00\n",
"8 Toyota White 60000 4 $6,250.00"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[car_sales[\"Make\"] == \"Toyota\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "a9bbcefc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>$4,000.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>$3,500.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 $4,000.00\n",
"4 Nissan White 213095 4 $3,500.00"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[car_sales[\"Odometer (KM)\"] > 100000]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "b2a8ee80",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Doors</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Make</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>BMW</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Honda</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nissan</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Toyota</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Doors 3 4 5\n",
"Make \n",
"BMW 0 0 1\n",
"Honda 0 3 0\n",
"Nissan 0 2 0\n",
"Toyota 1 3 0"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(car_sales[\"Make\"], car_sales[\"Doors\"])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "aa0d76c3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Make</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>BMW</th>\n",
" <td>11179.000000</td>\n",
" <td>5.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Honda</th>\n",
" <td>62778.333333</td>\n",
" <td>4.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nissan</th>\n",
" <td>122347.500000</td>\n",
" <td>4.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Toyota</th>\n",
" <td>85451.250000</td>\n",
" <td>3.75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Odometer (KM) Doors\n",
"Make \n",
"BMW 11179.000000 5.00\n",
"Honda 62778.333333 4.00\n",
"Nissan 122347.500000 4.00\n",
"Toyota 85451.250000 3.75"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Groupby\n",
"car_sales.groupby([\"Make\"]).mean()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "52390ea4",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "e7fdeb36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Odometer (KM)\"].plot()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "fc72e078",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATAklEQVR4nO3dcayddX3H8fdX2k3GxYKWXZuCFrduiaOJ0htgcZp7l+lKZSPb+ANCQNhMg4NEt5JQNUH3xxJ0wUyCsXaROBbm1QXdCNQ4Q7giW5i0rLQUZHasyUq7EjErXml01e/+OE/Xw+k59zzn3uece/zt/UpO7jnP8zvP+dzfefj03Oc85xCZiSTpZ99rljuAJKkZFrokFcJCl6RCWOiSVAgLXZIKsWK5Hnj16tW5bt265Xr40/zwhz/krLPOWu4YpzFXfeOYCcw1iHHMBOOVa/fu3d/LzPO6rszMZbls3Lgxx8kjjzyy3BG6Mld945gp01yDGMdMmeOVC9iVPXrVQy6SVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEH0LPSJeGxHfjoinImJ/RPxZlzEREXdFxIGI2BsRFw8nriSplzofLPoR8JuZOR8RK4HHIuJrmfl425jLgfXV5VLgs9VPSdKI9H2FXp3LPl/dXFldOr9E/Urg3mrs48A5EbGm2aiSpIVE1vgfXETEGcBu4JeBz2TmbR3rHwTuyMzHqtsPA7dl5q6OcVuALQCTk5MbZ2dnG/klmjA/P8/ExETfcfteODaCNKdMnglHj8OGtatG+rj91J2vURrHTGCuQYxjJhivXDMzM7szc6rbulrf5ZKZPwHeFhHnAF+NiIsy8+m2IdHtbl22swPYATA1NZXT09N1Hn4k5ubmqJPnhm0PDT9Mm60bTnDnvhUcvHZ6pI/bT935GqVxzATmGsQ4ZoLxzdVpoLNcMvO/gTlgU8eqQ8AFbbfPBw4vJZgkaTB1znI5r3plTkScCfwW8J2OYQ8A11dnu1wGHMvMI02HlST1VueQyxrgr6vj6K8BvpyZD0bETQCZuR3YCWwGDgCvADcOKa8kqYe+hZ6Ze4G3d1m+ve16Ajc3G02SNAg/KSpJhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5Jhehb6BFxQUQ8EhHPRsT+iPhglzHTEXEsIvZUl9uHE1eS1MuKGmNOAFsz88mIOBvYHRHfyMxnOsZ9KzOvaD6iJKmOvq/QM/NIZj5ZXf8B8CywdtjBJEmDicysPzhiHfAocFFmvty2fBq4HzgEHAZuzcz9Xe6/BdgCMDk5uXF2dnYJ0Zs1Pz/PxMRE33H7Xjg2gjSnTJ4JR4/DhrWrRvq4/dSdr1Eax0xgrkGMYyYYr1wzMzO7M3Oq27rahR4RE8A3gT/PzK90rHsd8NPMnI+IzcCnM3P9QtubmprKXbt21XrsUZibm2N6errvuHXbHhp+mDZbN5zgzn0rOHjHe0f6uP3Una9RGsdMYK5BjGMmGK9cEdGz0Gud5RIRK2m9Ar+vs8wBMvPlzJyvru8EVkbE6iVkliQNqM5ZLgF8Hng2Mz/VY8wbq3FExCXVdl9qMqgkaWF1znJ5B3AdsC8i9lTLPgK8CSAztwNXAR+IiBPAceDqHOTgvCRpyfoWemY+BkSfMXcDdzcVSpI0OD8pKkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRB9Cz0iLoiIRyLi2YjYHxEf7DImIuKuiDgQEXsj4uLhxJUk9bKixpgTwNbMfDIizgZ2R8Q3MvOZtjGXA+ury6XAZ6ufkqQR6fsKPTOPZOaT1fUfAM8CazuGXQncmy2PA+dExJrG00qSeorMrD84Yh3wKHBRZr7ctvxB4I7MfKy6/TBwW2bu6rj/FmALwOTk5MbZ2dkl/wJNmZ+fZ2Jiou+4fS8cG0GaUybPhKPHR/qQp9mwdtVpy+rO1yiNYyYw1yDGMROMV66ZmZndmTnVbV2dQy4ARMQEcD/wofYyP7m6y11O+5ciM3cAOwCmpqZyenq67sMP3dzcHHXy3LDtoeGHabN1wwnu3Ff7aRqKg9dOn7as7nyN0jhmAnMNYhwzwfjm6lTrLJeIWEmrzO/LzK90GXIIuKDt9vnA4aXHkyTVVecslwA+DzybmZ/qMewB4PrqbJfLgGOZeaTBnJKkPur8Lf8O4DpgX0TsqZZ9BHgTQGZuB3YCm4EDwCvAjY0nlSQtqG+hV290djtG3j4mgZubCiVJGpyfFJWkQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQvQt9Ii4JyJejIine6yfjohjEbGnutzefExJUj8raoz5AnA3cO8CY76VmVc0kkiStCh9X6Fn5qPA90eQRZK0BJGZ/QdFrAMezMyLuqybBu4HDgGHgVszc3+P7WwBtgBMTk5unJ2dXWzuxs3PzzMxMdF33L4Xjo0gzSmTZ8LR4yN9yNNsWLvqtGV152uUxjETmGsQ45gJxivXzMzM7syc6rauiUJ/HfDTzJyPiM3ApzNzfb9tTk1N5a5du/o+9qjMzc0xPT3dd9y6bQ8NP0ybrRtOcOe+OkfGhufgHe89bVnd+RqlccwE5hrEOGaC8coVET0LfclnuWTmy5k5X13fCayMiNVL3a4kaTBLLvSIeGNERHX9kmqbLy11u5KkwfT9Wz4ivghMA6sj4hDwMWAlQGZuB64CPhARJ4DjwNVZ5ziOJKlRfQs9M6/ps/5uWqc1SpKWkZ8UlaRCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVIi+hR4R90TEixHxdI/1ERF3RcSBiNgbERc3H1OS1E+dV+hfADYtsP5yYH112QJ8dumxJEmD6lvomfko8P0FhlwJ3JstjwPnRMSapgJKkuqJzOw/KGId8GBmXtRl3YPAHZn5WHX7YeC2zNzVZewWWq/imZyc3Dg7O7uo0PteOLao+y1k8kw4erzxzS6ZueprMtOGtaua2RAwPz/PxMRErbHD2Ld7aZ+vJn/fpRhkrpZqkLluen9fynzPzMzszsypbutWLHqrp0SXZV3/lcjMHcAOgKmpqZyenl7UA96w7aFF3W8hWzec4M59TUxHs8xVX5OZDl473ch2AObm5qi7rw9j3+6lfb6a/H2XYpC5WqpB5rrp/X1Y893EWS6HgAvabp8PHG5gu5KkATRR6A8A11dnu1wGHMvMIw1sV5I0gL5/Q0TEF4FpYHVEHAI+BqwEyMztwE5gM3AAeAW4cVhhJUm99S30zLymz/oEbm4skSRpUfykqCQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVolahR8SmiHguIg5ExLYu66cj4lhE7KkutzcfVZK0kBX9BkTEGcBngHcDh4AnIuKBzHymY+i3MvOKIWSUJNVQ5xX6JcCBzHw+M38MzAJXDjeWJGlQkZkLD4i4CtiUme+vbl8HXJqZt7SNmQbup/UK/jBwa2bu77KtLcAWgMnJyY2zs7OLCr3vhWOLut9CJs+Eo8cb3+ySmau+JjNtWLuqmQ0B8/PzTExM1Bo7jH27l/b5avL3XYpB5mqpBpnrpvf3pcz3zMzM7syc6rau7yEXILos6/xX4EngzZk5HxGbgb8H1p92p8wdwA6AqampnJ6ervHwp7th20OLut9Ctm44wZ376kzHaJmrviYzHbx2upHtAMzNzVF3Xx/Gvt1L+3w1+fsuxSBztVSDzHXT+/uw5rvOIZdDwAVtt8+n9Sr8/2Tmy5k5X13fCayMiNWNpZQk9VWn0J8A1kfEhRHxc8DVwAPtAyLijRER1fVLqu2+1HRYSVJvff+GyMwTEXEL8HXgDOCezNwfETdV67cDVwEfiIgTwHHg6ux3cF6S1KhaB4Wqwyg7O5Ztb7t+N3B3s9EkSYPwk6KSVAgLXZIKYaFLUiEsdEkqhIUuSYWw0CWpEBa6JBXCQpekQljoklQIC12SCmGhS1IhLHRJKoSFLkmFsNAlqRAWuiQVwkKXpEJY6JJUCAtdkgphoUtSISx0SSqEhS5JhbDQJakQFrokFcJCl6RCWOiSVAgLXZIKUavQI2JTRDwXEQciYluX9RERd1Xr90bExc1HlSQtpG+hR8QZwGeAy4G3AtdExFs7hl0OrK8uW4DPNpxTktRHnVfolwAHMvP5zPwxMAtc2THmSuDebHkcOCci1jScVZK0gMjMhQdEXAVsysz3V7evAy7NzFvaxjwI3JGZj1W3HwZuy8xdHdvaQusVPMCvAs819Ys0YDXwveUO0YW56hvHTGCuQYxjJhivXG/OzPO6rVhR487RZVnnvwJ1xpCZO4AdNR5z5CJiV2ZOLXeOTuaqbxwzgbkGMY6ZYHxzdapzyOUQcEHb7fOBw4sYI0kaojqF/gSwPiIujIifA64GHugY8wBwfXW2y2XAscw80nBWSdIC+h5yycwTEXEL8HXgDOCezNwfETdV67cDO4HNwAHgFeDG4UUemrE8FIS5BjGOmcBcgxjHTDC+uV6l75uikqSfDX5SVJIKYaFLUikys5gLrTNtHgGeBfYDH6yWfxx4AdhTXTa33efDtI79Pwf8dtvyjcC+at1dnDo89fPAl6rl/wKsq5ntYLW9PcCuatnrgW8A361+njuqXLQ+B7Cn7fIy8KHlmCvgHuBF4Om2ZSOZG+B91WN8F3hfjVx/AXwH2At8FTinWr4OON42b9tHnGskz1uvXD0yfaktz0FgzzLMVa9OWPb9aygdOOwHGOUFWANcXF0/G/g3Wl9X8HHg1i7j3wo8VT0hFwL/DpxRrfs28Ou0zrH/GnB5tfyPT+6AtM74+VLNbAeB1R3LPglsq65vAz4x6lzV+DOA/wLevBxzBbwLuJhXl8HQ54bWf9TPVz/Pra6f2yfXe4AV1fVPtOVa1z6u4/cbRa6hP28L5eqWqSPHncDtyzBXvTph2fevoXTgMDe+3BfgH4B3L7Czfxj4cNvtr1dP2BrgO23LrwE+1z6mur6C1qfHokaWg5xe6M8Ba9p2vOdGnasa/x7gn6rryzJXdPxHPoq5aR9TrfsccM1CuTrW/R5w30LjRpVrFM9bv1wLzEEA/wmsX4656tEJY7F/NX0p9hh6RKwD3k7rTyCAW6pvgrwnIs6tlq2ltaOddKhatra63rn8VffJzBPAMeANNSIl8I8Rsbv6CgSAyazO169+/uIy5ILWq4ovtt1e7rmC0cxNr23V9Ye0XqmddGFE/GtEfDMi3tn22KPKNeznbbG53gkczczvti0b+Vx1dMLPwv41sCILPSImgPuBD2Xmy7S+/fGXgLcBR2j9+Qe9v7Jgoa8yqPU1B128IzMvpvXNlDdHxLsWGDuyXNWHxX4X+Ltq0TjM1UKazLHofBHxUeAEcF+16Ajwpsx8O/CnwN9GxOtGmGsUz9ti5+saXv2CYeRz1aUTehmH+Vq04go9IlbSeuLuy8yvAGTm0cz8SWb+FPgrWt8gCb2/suBQdb1z+avuExErgFXA9/vlyszD1c8Xab2Zdglw9OS3UlY/Xxx1Llr/wDyZmUerfMs+V5VRzM2ivrIiIt4HXAFcm9Xf0pn5o8x8qbq+m9ax118ZVa4RPW8D56ru//u03jQ8mXWkc9WtExjj/WtJhnk8Z9QXWv8i3gv8ZcfyNW3X/wSYra7/Gq9+A+R5Tr0B8gRwGafeANlcLb+ZV78B8uUauc4Czm67/s/AJlpnTLS/MfPJUeaqxs4CNy73XHH6MeGhzw2tN6v+g9YbVudW11/fJ9cm4BngvI5x57XleAutM05eP8JcQ3/e+uXqzNQ2X99crrmidyeMxf7VeAcOc+OjvgC/QetPmr20nb4F/A2t04320vremfad/6O0XiE8R/WudbV8Cni6Wnc3p05Rei2twxMHaL3r/ZYaud5S7SRP0Tp16qPV8jcAD9M6penhjh1xFLl+AXgJWNW2bORzRevP8SPA/9B6VfNHo5obWsfBD1SXG2vkOkDruOjJ/evkf8h/UD23TwFPAr8z4lwjed565eqWqVr+BeCmjvyjnKtenbDs+9cwLn70X5IKUdwxdEn6/8pCl6RCWOiSVAgLXZIKYaFLUiEsdEkqhIUuSYX4X0awxPCjY0uaAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Odometer (KM)\"].hist()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "b30e787b",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "no numeric data to plot",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [36]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcar_sales\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPrice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_core.py:972\u001b[0m, in \u001b[0;36mPlotAccessor.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 969\u001b[0m label_name \u001b[38;5;241m=\u001b[39m label_kw \u001b[38;5;129;01mor\u001b[39;00m data\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[1;32m 970\u001b[0m data\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m=\u001b[39m label_name\n\u001b[0;32m--> 972\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mplot_backend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/__init__.py:71\u001b[0m, in \u001b[0;36mplot\u001b[0;34m(data, kind, **kwargs)\u001b[0m\n\u001b[1;32m 69\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124max\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(ax, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft_ax\u001b[39m\u001b[38;5;124m\"\u001b[39m, ax)\n\u001b[1;32m 70\u001b[0m plot_obj \u001b[38;5;241m=\u001b[39m PLOT_CLASSES[kind](data, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 71\u001b[0m \u001b[43mplot_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 72\u001b[0m plot_obj\u001b[38;5;241m.\u001b[39mdraw()\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m plot_obj\u001b[38;5;241m.\u001b[39mresult\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/core.py:327\u001b[0m, in \u001b[0;36mMPLPlot.generate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_args_adjust()\n\u001b[0;32m--> 327\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compute_plot_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setup_subplots()\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_plot()\n",
"File \u001b[0;32m~/Documents/learning/machine_learning_course/sample_project/env/lib/python3.10/site-packages/pandas/plotting/_matplotlib/core.py:506\u001b[0m, in \u001b[0;36mMPLPlot._compute_plot_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;66;03m# no non-numeric frames or series allowed\u001b[39;00m\n\u001b[1;32m 505\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_empty:\n\u001b[0;32m--> 506\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno numeric data to plot\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 508\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m=\u001b[39m numeric_data\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_convert_to_ndarray)\n",
"\u001b[0;31mTypeError\u001b[0m: no numeric data to plot"
]
}
],
"source": [
"car_sales[\"Price\"].plot()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "bb9b5864",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_4212/3108854531.py:1: FutureWarning: The default value of regex will change from True to False in a future version.\n",
" car_sales[\"Price\"] = car_sales[\"Price\"].str.replace('[\\$\\,\\.]','').astype(int)\n"
]
}
],
"source": [
"car_sales[\"Price\"] = car_sales[\"Price\"].str.replace('[\\$\\,\\.]','').astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "37128899",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 400000\n",
"1 500000\n",
"2 700000\n",
"3 2200000\n",
"4 350000\n",
"5 450000\n",
"6 750000\n",
"7 700000\n",
"8 625000\n",
"9 970000\n",
"Name: Price, dtype: int64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Price\"]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "962db850",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"car_sales[\"Price\"].plot()"
]
},
{
"cell_type": "markdown",
"id": "4413ac95",
"metadata": {},
"source": [
"## Manipulating Data"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "69eba555",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 toyota\n",
"1 honda\n",
"2 toyota\n",
"3 bmw\n",
"4 nissan\n",
"5 toyota\n",
"6 honda\n",
"7 honda\n",
"8 toyota\n",
"9 nissan\n",
"Name: Make, dtype: object"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Make\"].str.lower()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "ad6ee6b8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 Toyota White 150043 4 400000\n",
"1 Honda Red 87899 4 500000\n",
"2 Toyota Blue 32549 3 700000\n",
"3 BMW Black 11179 5 2200000\n",
"4 Nissan White 213095 4 350000\n",
"5 Toyota Green 99213 4 450000\n",
"6 Honda Blue 45698 4 750000\n",
"7 Honda Blue 54738 4 700000\n",
"8 Toyota White 60000 4 625000\n",
"9 Nissan White 31600 4 970000"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "cbbf9d5b",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Make\"] = car_sales[\"Make\"].str.lower()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "98b177e9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 toyota White 150043 4 400000\n",
"1 honda Red 87899 4 500000\n",
"2 toyota Blue 32549 3 700000\n",
"3 bmw Black 11179 5 2200000\n",
"4 nissan White 213095 4 350000\n",
"5 toyota Green 99213 4 450000\n",
"6 honda Blue 45698 4 750000\n",
"7 honda Blue 54738 4 700000\n",
"8 toyota White 60000 4 625000\n",
"9 nissan White 31600 4 970000"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "65b2f115",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price\n",
"0 toyota White 150043 4 400000\n",
"1 honda Red 87899 4 500000\n",
"2 toyota Blue 32549 3 700000\n",
"3 bmw Black 11179 5 2200000\n",
"4 nissan White 213095 4 350000\n",
"5 toyota Green 99213 4 450000\n",
"6 honda Blue 45698 4 750000\n",
"7 honda Blue 54738 4 700000\n",
"8 toyota White 60000 4 625000\n",
"9 nissan White 31600 4 970000"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "bd67bc4a",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing = pd.read_csv(\"car-sales-missing-data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "63f67964",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "97464074",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"92302.66666666667"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing[\"Odometer\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "8ba91fcd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 150043.000000\n",
"1 87899.000000\n",
"2 92302.666667\n",
"3 11179.000000\n",
"4 213095.000000\n",
"5 92302.666667\n",
"6 92302.666667\n",
"7 92302.666667\n",
"8 60000.000000\n",
"9 31600.000000\n",
"Name: Odometer, dtype: float64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing[\"Odometer\"].fillna(car_sales_missing[\"Odometer\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "4b053d9f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "8bc146ad",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing[\"Odometer\"].fillna(car_sales_missing[\"Odometer\"].mean(),\n",
" inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "8f6a17f8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.000000</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500\n",
"6 Honda NaN 92302.666667 4.0 $7,500\n",
"7 Honda Blue 92302.666667 4.0 NaN\n",
"8 Toyota White 60000.000000 NaN NaN\n",
"9 NaN White 31600.000000 4.0 $9,700"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "7059b936",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "36e52a55",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.000000</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500\n",
"6 Honda NaN 92302.666667 4.0 $7,500\n",
"7 Honda Blue 92302.666667 4.0 NaN\n",
"8 Toyota White 60000.000000 NaN NaN\n",
"9 NaN White 31600.000000 4.0 $9,700"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "48a80b90",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "23c87cea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.000000</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.000000</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>92302.666667</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.000000</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.000000</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>92302.666667</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.000000 4.0 $4,000\n",
"1 Honda Red 87899.000000 4.0 $5,000\n",
"2 Toyota Blue 92302.666667 3.0 $7,000\n",
"3 BMW Black 11179.000000 5.0 $22,000\n",
"4 Nissan White 213095.000000 4.0 $3,500\n",
"5 Toyota Green 92302.666667 4.0 $4,500"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "07ef69f1",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing = pd.read_csv(\"car-sales-missing-data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "1392f357",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Toyota</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>$7,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Toyota</td>\n",
" <td>Green</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$4,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Honda</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>$7,500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Honda</td>\n",
" <td>Blue</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>60000.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>White</td>\n",
" <td>31600.0</td>\n",
" <td>4.0</td>\n",
" <td>$9,700</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"2 Toyota Blue NaN 3.0 $7,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500\n",
"5 Toyota Green NaN 4.0 $4,500\n",
"6 Honda NaN NaN 4.0 $7,500\n",
"7 Honda Blue NaN 4.0 NaN\n",
"8 Toyota White 60000.0 NaN NaN\n",
"9 NaN White 31600.0 4.0 $9,700"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "2c141c98",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing_dropped = car_sales_missing.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "620ef1af",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Toyota</td>\n",
" <td>White</td>\n",
" <td>150043.0</td>\n",
" <td>4.0</td>\n",
" <td>$4,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Honda</td>\n",
" <td>Red</td>\n",
" <td>87899.0</td>\n",
" <td>4.0</td>\n",
" <td>$5,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BMW</td>\n",
" <td>Black</td>\n",
" <td>11179.0</td>\n",
" <td>5.0</td>\n",
" <td>$22,000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Nissan</td>\n",
" <td>White</td>\n",
" <td>213095.0</td>\n",
" <td>4.0</td>\n",
" <td>$3,500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer Doors Price\n",
"0 Toyota White 150043.0 4.0 $4,000\n",
"1 Honda Red 87899.0 4.0 $5,000\n",
"3 BMW Black 11179.0 5.0 $22,000\n",
"4 Nissan White 213095.0 4.0 $3,500"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_missing_dropped"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "781e9bf0",
"metadata": {},
"outputs": [],
"source": [
"car_sales_missing_dropped.to_csv(\"car-sales-missing-dropped.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "9bccd7ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats\n",
"0 toyota White 150043 4 400000 5.0\n",
"1 honda Red 87899 4 500000 5.0\n",
"2 toyota Blue 32549 3 700000 5.0\n",
"3 bmw Black 11179 5 2200000 5.0\n",
"4 nissan White 213095 4 350000 5.0\n",
"5 toyota Green 99213 4 450000 NaN\n",
"6 honda Blue 45698 4 750000 NaN\n",
"7 honda Blue 54738 4 700000 NaN\n",
"8 toyota White 60000 4 625000 NaN\n",
"9 nissan White 31600 4 970000 NaN"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Column from series\n",
"seats_column = pd.Series([5, 5, 5, 5, 5,])\n",
"\n",
"# New column called seats\n",
"car_sales[\"Seats\"] = seats_column\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "c4f2c1e4",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Seats\"].fillna(5, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "8de131a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats\n",
"0 toyota White 150043 4 400000 5.0\n",
"1 honda Red 87899 4 500000 5.0\n",
"2 toyota Blue 32549 3 700000 5.0\n",
"3 bmw Black 11179 5 2200000 5.0\n",
"4 nissan White 213095 4 350000 5.0\n",
"5 toyota Green 99213 4 450000 5.0\n",
"6 honda Blue 45698 4 750000 5.0\n",
"7 honda Blue 54738 4 700000 5.0\n",
"8 toyota White 60000 4 625000 5.0\n",
"9 nissan White 31600 4 970000 5.0"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "8b823fad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM\n",
"0 toyota White 150043 4 400000 5.0 7.5\n",
"1 honda Red 87899 4 500000 5.0 9.2\n",
"2 toyota Blue 32549 3 700000 5.0 5.0\n",
"3 bmw Black 11179 5 2200000 5.0 9.6\n",
"4 nissan White 213095 4 350000 5.0 8.7\n",
"5 toyota Green 99213 4 450000 5.0 4.7\n",
"6 honda Blue 45698 4 750000 5.0 7.6\n",
"7 honda Blue 54738 4 700000 5.0 8.7\n",
"8 toyota White 60000 4 625000 5.0 3.0\n",
"9 nissan White 31600 4 970000 5.0 4.5"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Column from Python list\n",
"fuel_economy = [7.5, 9.2, 5.0, 9.6, 8.7, 4.7, 7.6, 8.7, 3.0, 4.5]\n",
"car_sales[\"Fuel per 100KM\"] = fuel_economy\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "9a181add",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Total fuel used (L)\"] = car_sales[\"Odometer (KM)\"]/100 * car_sales[\"Fuel per 100KM\"]"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "20131395",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used</th>\n",
" <th>Total fuel used (L)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>11253.225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>8086.708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>1627.450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>1073.184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>18539.265</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4663.011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>3473.048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4762.206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>1800.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>1422.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used Total fuel used (L) \n",
"0 11253.225 11253.225 \n",
"1 8086.708 8086.708 \n",
"2 1627.450 1627.450 \n",
"3 1073.184 1073.184 \n",
"4 18539.265 18539.265 \n",
"5 4663.011 4663.011 \n",
"6 3473.048 3473.048 \n",
"7 4762.206 4762.206 \n",
"8 1800.000 1800.000 \n",
"9 1422.000 1422.000 "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "d9f81e85",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used Total fuel used (L) Number of wheels \n",
"0 11253.225 11253.225 4 \n",
"1 8086.708 8086.708 4 \n",
"2 1627.450 1627.450 4 \n",
"3 1073.184 1073.184 4 \n",
"4 18539.265 18539.265 4 \n",
"5 4663.011 4663.011 4 \n",
"6 3473.048 3473.048 4 \n",
"7 4762.206 4762.206 4 \n",
"8 1800.000 1800.000 4 \n",
"9 1422.000 1422.000 4 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a column from a single value\n",
"car_sales[\"Number of wheels\"] = 4\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "6859af53",
"metadata": {},
"outputs": [],
"source": [
"car_sales[\"Passed road safety\"] = True"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "a289dd85",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Make object\n",
"Colour object\n",
"Odometer (KM) int64\n",
"Doors int64\n",
"Price int64\n",
"Seats float64\n",
"Fuel per 100KM float64\n",
"Total fuel used float64\n",
"Total fuel used (L) float64\n",
"Number of wheels int64\n",
"Passed road safety bool\n",
"dtype: object"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "a7c11bc0",
"metadata": {},
"outputs": [],
"source": [
"car_sales = car_sales.drop(\"Total fuel used\", axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "15d32814",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"4 nissan White 213095 4 350000 5.0 8.7 \n",
"5 toyota Green 99213 4 450000 5.0 4.7 \n",
"6 honda Blue 45698 4 750000 5.0 7.6 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 11253.225 4 True \n",
"1 8086.708 4 True \n",
"2 1627.450 4 True \n",
"3 1073.184 4 True \n",
"4 18539.265 4 True \n",
"5 4663.011 4 True \n",
"6 3473.048 4 True \n",
"7 4762.206 4 True \n",
"8 1800.000 4 True \n",
"9 1422.000 4 True "
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "34c60bf4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"1 honda Red 87899 4 500000 5.0 9.2 \n",
"9 nissan White 31600 4 970000 5.0 4.5 \n",
"7 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 toyota Blue 32549 3 700000 5.0 5.0 \n",
"3 bmw Black 11179 5 2200000 5.0 9.6 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"1 8086.708 4 True \n",
"9 1422.000 4 True \n",
"7 4762.206 4 True \n",
"2 1627.450 4 True \n",
"3 1073.184 4 True "
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales.sample(frac=0.5)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "6ae2e87d",
"metadata": {},
"outputs": [],
"source": [
"car_sales_shuffled = car_sales.sample(frac=1)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "46c3d98c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 toyota White 150043 4 400000 5.0 7.5 \n",
"8 toyota White 60000 4 625000 5.0 3.0 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 11253.225 4 True \n",
"8 1800.000 4 True "
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales_shuffled.sample(frac=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "bf846d5d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 213095 4 350000 5.0 8.7 \n",
"1 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 nissan White 31600 4 970000 5.0 4.5 \n",
"3 toyota Blue 32549 3 700000 5.0 5.0 \n",
"4 honda Blue 45698 4 750000 5.0 7.6 \n",
"5 toyota White 60000 4 625000 5.0 3.0 \n",
"6 honda Red 87899 4 500000 5.0 9.2 \n",
"7 bmw Black 11179 5 2200000 5.0 9.6 \n",
"8 toyota White 150043 4 400000 5.0 7.5 \n",
"9 toyota Green 99213 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales = car_sales_shuffled.reset_index(drop=True)\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "b9220ca6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>213095</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>54738</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>31600</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>32549</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>45698</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>87899</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>11179</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>150043</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>99213</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 213095 4 350000 5.0 8.7 \n",
"1 honda Blue 54738 4 700000 5.0 8.7 \n",
"2 nissan White 31600 4 970000 5.0 4.5 \n",
"3 toyota Blue 32549 3 700000 5.0 5.0 \n",
"4 honda Blue 45698 4 750000 5.0 7.6 \n",
"5 toyota White 60000 4 625000 5.0 3.0 \n",
"6 honda Red 87899 4 500000 5.0 9.2 \n",
"7 bmw Black 11179 5 2200000 5.0 9.6 \n",
"8 toyota White 150043 4 400000 5.0 7.5 \n",
"9 toyota Green 99213 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "bdcc6512",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Colour</th>\n",
" <th>Odometer (KM)</th>\n",
" <th>Doors</th>\n",
" <th>Price</th>\n",
" <th>Seats</th>\n",
" <th>Fuel per 100KM</th>\n",
" <th>Total fuel used (L)</th>\n",
" <th>Number of wheels</th>\n",
" <th>Passed road safety</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>133184.375</td>\n",
" <td>4</td>\n",
" <td>350000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>18539.265</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>34211.250</td>\n",
" <td>4</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>8.7</td>\n",
" <td>4762.206</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>nissan</td>\n",
" <td>White</td>\n",
" <td>19750.000</td>\n",
" <td>4</td>\n",
" <td>970000</td>\n",
" <td>5.0</td>\n",
" <td>4.5</td>\n",
" <td>1422.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>toyota</td>\n",
" <td>Blue</td>\n",
" <td>20343.125</td>\n",
" <td>3</td>\n",
" <td>700000</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1627.450</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>honda</td>\n",
" <td>Blue</td>\n",
" <td>28561.250</td>\n",
" <td>4</td>\n",
" <td>750000</td>\n",
" <td>5.0</td>\n",
" <td>7.6</td>\n",
" <td>3473.048</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>37500.000</td>\n",
" <td>4</td>\n",
" <td>625000</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1800.000</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>honda</td>\n",
" <td>Red</td>\n",
" <td>54936.875</td>\n",
" <td>4</td>\n",
" <td>500000</td>\n",
" <td>5.0</td>\n",
" <td>9.2</td>\n",
" <td>8086.708</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bmw</td>\n",
" <td>Black</td>\n",
" <td>6986.875</td>\n",
" <td>5</td>\n",
" <td>2200000</td>\n",
" <td>5.0</td>\n",
" <td>9.6</td>\n",
" <td>1073.184</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>toyota</td>\n",
" <td>White</td>\n",
" <td>93776.875</td>\n",
" <td>4</td>\n",
" <td>400000</td>\n",
" <td>5.0</td>\n",
" <td>7.5</td>\n",
" <td>11253.225</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>toyota</td>\n",
" <td>Green</td>\n",
" <td>62008.125</td>\n",
" <td>4</td>\n",
" <td>450000</td>\n",
" <td>5.0</td>\n",
" <td>4.7</td>\n",
" <td>4663.011</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Colour Odometer (KM) Doors Price Seats Fuel per 100KM \\\n",
"0 nissan White 133184.375 4 350000 5.0 8.7 \n",
"1 honda Blue 34211.250 4 700000 5.0 8.7 \n",
"2 nissan White 19750.000 4 970000 5.0 4.5 \n",
"3 toyota Blue 20343.125 3 700000 5.0 5.0 \n",
"4 honda Blue 28561.250 4 750000 5.0 7.6 \n",
"5 toyota White 37500.000 4 625000 5.0 3.0 \n",
"6 honda Red 54936.875 4 500000 5.0 9.2 \n",
"7 bmw Black 6986.875 5 2200000 5.0 9.6 \n",
"8 toyota White 93776.875 4 400000 5.0 7.5 \n",
"9 toyota Green 62008.125 4 450000 5.0 4.7 \n",
"\n",
" Total fuel used (L) Number of wheels Passed road safety \n",
"0 18539.265 4 True \n",
"1 4762.206 4 True \n",
"2 1422.000 4 True \n",
"3 1627.450 4 True \n",
"4 3473.048 4 True \n",
"5 1800.000 4 True \n",
"6 8086.708 4 True \n",
"7 1073.184 4 True \n",
"8 11253.225 4 True \n",
"9 4663.011 4 True "
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_sales[\"Odometer (KM)\"] = car_sales[\"Odometer (KM)\"].apply(lambda x: x / 1.6)\n",
"car_sales"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d3da940",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}