From fc3073b7fa5867fe3ab4654dff76f6031b6151b4 Mon Sep 17 00:00:00 2001 From: ghassan alabsi Date: Thu, 26 Jan 2023 06:44:25 +0100 Subject: [PATCH] late submission 26/Jan/2023 --- assignment.ipynb | 564 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 564 insertions(+) create mode 100644 assignment.ipynb diff --git a/assignment.ipynb b/assignment.ipynb new file mode 100644 index 0000000..5901301 --- /dev/null +++ b/assignment.ipynb @@ -0,0 +1,564 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.rcParams['figure.figsize'] = (10, 8)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryfood_categoryconsumptionco2_emission
1Argentinapork10.5137.20
2Argentinapoultry38.6641.53
3Argentinabeef55.481712.00
4Argentinalamb_goat1.5654.63
5Argentinafish4.366.96
\n", + "
" + ], + "text/plain": [ + " country food_category consumption co2_emission\n", + "1 Argentina pork 10.51 37.20\n", + "2 Argentina poultry 38.66 41.53\n", + "3 Argentina beef 55.48 1712.00\n", + "4 Argentina lamb_goat 1.56 54.63\n", + "5 Argentina fish 4.36 6.96" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "food_consumption = pd.read_csv('food_consumption.csv', index_col=0)\n", + "food_consumption.head()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#filter for Belgium\n", + "be_consumption = food_consumption[food_consumption['country'] == 'Belgium']\n", + "\n", + "# Filter for USA\n", + "usa_consumption = food_consumption[food_consumption['country'] == 'USA']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-1) Calculate mean and median consumption in Belgium" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "42.13272727272727\n", + "12.59\n" + ] + } + ], + "source": [ + "print (be_consumption['consumption'].mean())\n", + "print(be_consumption['consumption'].median())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-2) Calculate mean and median consumption of USA" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "44.650000000000006\n", + "14.58\n" + ] + } + ], + "source": [ + "print (usa_consumption['consumption'].mean())\n", + "print(usa_consumption['consumption'].median())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Work with both countries together\n", + "be_and_usa = food_consumption[(food_consumption['country'] == 'Belgium') | \n", + " (food_consumption['country'] == 'USA')]\n", + "\n", + "# Q-3) Group by country, select consumption column, and compute mean and median\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanmedian
country
Belgium42.13272712.59
USA44.65000014.58
\n", + "
" + ], + "text/plain": [ + " mean median\n", + "country \n", + "Belgium 42.132727 12.59\n", + "USA 44.650000 14.58" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "be_and_usa = food_consumption[(food_consumption['country'] == 'Belgium') | \n", + " (food_consumption['country'] == 'USA')]\n", + "be_and_usa.groupby('country')['consumption'].agg(['mean','median'])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "rice_consumption = food_consumption[food_consumption['food_category'] == 'rice']\n", + "\n", + "Q-4)Plot the histogram of co2_emission for rice" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([77., 16., 13., 6., 5., 5., 3., 2., 1., 2.]),\n", + " array([ 1.22 , 23.074, 44.928, 66.782, 88.636, 110.49 , 132.344,\n", + " 154.198, 176.052, 197.906, 219.76 ]),\n", + " )" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(rice_consumption['co2_emission'], edgecolor='black')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-5) Calculate mean and median of co2_emission with .agg()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 37.591615\n", + "median 15.200000\n", + "Name: co2_emission, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rice_consumption = food_consumption[food_consumption['food_category'] == 'rice']\n", + "rice_consumption['co2_emission'].agg(['mean', 'median'])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-6) Calculate the quintiles of co2_emission\n", + "print(np.quantile(missing part, np.linspace(0, 1, 6)))\n", + "\n", + "You only need to fill in the missing part." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0. 3.54 11.026 25.59 99.978 1712. ]\n" + ] + } + ], + "source": [ + "print(np.quantile(food_consumption['co2_emission'], np.linspace(0, 1, 6)))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-7) Calculate the variance and standard deviation of co2_emission \n", + "for food_categories" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
varstd
food_category
beef88748.408132297.906710
dairy17671.891985132.935669
eggs21.3718194.622966
fish921.63734930.358481
lamb_goat16475.518363128.356996
nuts35.6396525.969895
pork3094.96353755.632396
poultry245.02680115.653332
rice2281.37624347.763754
soybeans0.8798820.938020
wheat71.0239378.427570
\n", + "
" + ], + "text/plain": [ + " var std\n", + "food_category \n", + "beef 88748.408132 297.906710\n", + "dairy 17671.891985 132.935669\n", + "eggs 21.371819 4.622966\n", + "fish 921.637349 30.358481\n", + "lamb_goat 16475.518363 128.356996\n", + "nuts 35.639652 5.969895\n", + "pork 3094.963537 55.632396\n", + "poultry 245.026801 15.653332\n", + "rice 2281.376243 47.763754\n", + "soybeans 0.879882 0.938020\n", + "wheat 71.023937 8.427570" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "food_consumption.groupby('food_category')['co2_emission'].agg(['var','std'])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Q-8) Create histogram of co2_emission for food_category 'beef'" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([77., 16., 13., 6., 5., 5., 3., 2., 1., 2.]),\n", + " array([ 1.22 , 23.074, 44.928, 66.782, 88.636, 110.49 , 132.344,\n", + " 154.198, 176.052, 197.906, 219.76 ]),\n", + " )" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "beef_consumption = food_consumption[food_consumption['food_category'] == 'beef']\n", + "\n", + "plt.hist(rice_consumption['co2_emission'], edgecolor='black')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "4f58e81c2e092e1deee7556cb116668ef0b81c0e3f6aa3662faaf88ffce4bde0" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}