From 58f2182e713f0b630948a75ab6f26e345c34d3fe Mon Sep 17 00:00:00 2001 From: Rumeysa Yakar Date: Wed, 11 Jan 2023 20:38:58 +0100 Subject: [PATCH 1/2] questions answered --- main.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..261e70b --- /dev/null +++ b/main.py @@ -0,0 +1,62 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +plt.rcParams['figure.figsize'] = (10, 8) + +#read the data +#./dataset/ is a path. Copy and paste the path of the CSV file in your computer to read the data. +food_consumption = pd.read_csv('/Users/rumeysayakar/Desktop/derslik/statistic/Class7-MathStats-Module-Week16/food_consumption.csv', index_col=0) +food_consumption.head() + +#filter for Belgium +be_consumption = food_consumption[food_consumption['country'] == 'Belgium'] + +# Filter for USA +usa_consumption = food_consumption[food_consumption['country'] == 'USA'] + +# Q-1) Calculate mean and median consumption in Belgium +print(np.mean(be_consumption['consumption'])) +print(np.median(be_consumption['consumption'])) + +# Q-2) Calculate mean and median consumption of USA +print(np.mean(usa_consumption['consumption'])) +print(np.median(usa_consumption['consumption'])) + +# # Work with both countries together +be_and_usa = food_consumption[(food_consumption['country'] == 'Belgium') | + (food_consumption['country'] == 'USA')] + +# # Q-3) Group by country, select consumption column, and compute mean and median +print(be_and_usa.groupby('country')['consumption'].agg([np.mean, np.median])) + + +rice_consumption = food_consumption[food_consumption['food_category'] == 'rice'] +# Q-4)Plot the histogram of co2_emission for rice +rice_consumption['co2_emission'].hist() +# Q-5) Calculate mean and median of co2_emission with .agg() +print(rice_consumption['co2_emission'].agg([np.mean, np.median])) + +# Q-6) Calculate the quintiles of co2_emission +print(np.quantile(food_consumption['co2_emission'], np.linspace(0, 1, 6))) + +# Q-7) Calculate the variance and standard deviation of co2_emission for food_categories +print(food_consumption.groupby('food_category')['co2_emission'].agg([np.var, np.std])) + +# Q-8) Create histogram of co2_emission for food_category 'beef' +food_consumption[food_consumption['food_category'] == 'beef']['co2_emission'].hist() + +emissions_by_country = food_consumption.groupby('country')['co2_emission'].sum() +print(emissions_by_country) + +q1 = np.quantile(emissions_by_country, 0.25) +q3 = np.quantile(emissions_by_country, 0.75) +iqr = q3 - q1 + +# Calculate the lower and upper cutoffs for outliers +lower = q1 - 1.5 * iqr +upper = q3 + 1.5 * iqr + +# Subset emissions_by_country to find outliers +outliers = emissions_by_country[(emissions_by_country > upper) | (emissions_by_country < lower)] +print(outliers) \ No newline at end of file From 7ad30ad803088b5b5d9de2c5c52a3378f2f1ffa4 Mon Sep 17 00:00:00 2001 From: Rumeysa Yakar Date: Wed, 11 Jan 2023 20:51:07 +0100 Subject: [PATCH 2/2] questions answered --- main.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 261e70b..f974e94 100644 --- a/main.py +++ b/main.py @@ -4,44 +4,56 @@ plt.rcParams['figure.figsize'] = (10, 8) -#read the data -#./dataset/ is a path. Copy and paste the path of the CSV file in your computer to read the data. food_consumption = pd.read_csv('/Users/rumeysayakar/Desktop/derslik/statistic/Class7-MathStats-Module-Week16/food_consumption.csv', index_col=0) food_consumption.head() #filter for Belgium be_consumption = food_consumption[food_consumption['country'] == 'Belgium'] + # Filter for USA usa_consumption = food_consumption[food_consumption['country'] == 'USA'] + # Q-1) Calculate mean and median consumption in Belgium -print(np.mean(be_consumption['consumption'])) -print(np.median(be_consumption['consumption'])) +mean_of_belgium = np.mean(be_consumption['consumption']) +median_of_belgium = np.median(be_consumption['consumption']) +print(mean_of_belgium) +print(median_of_belgium) + # Q-2) Calculate mean and median consumption of USA -print(np.mean(usa_consumption['consumption'])) -print(np.median(usa_consumption['consumption'])) +mean_of_usa = np.mean(usa_consumption['consumption']) +median_of_usa = np.median(usa_consumption['consumption']) +print(mean_of_usa) +print(median_of_usa) + # # Work with both countries together be_and_usa = food_consumption[(food_consumption['country'] == 'Belgium') | (food_consumption['country'] == 'USA')] + # # Q-3) Group by country, select consumption column, and compute mean and median -print(be_and_usa.groupby('country')['consumption'].agg([np.mean, np.median])) +group_be_and_usa = be_and_usa.groupby('country')['consumption'].agg([np.mean, np.median]) +print(group_be_and_usa) rice_consumption = food_consumption[food_consumption['food_category'] == 'rice'] # Q-4)Plot the histogram of co2_emission for rice rice_consumption['co2_emission'].hist() # Q-5) Calculate mean and median of co2_emission with .agg() -print(rice_consumption['co2_emission'].agg([np.mean, np.median])) +co2_mean_and_median = rice_consumption['co2_emission'].agg([np.mean, np.median]) +print(co2_mean_and_median) + # Q-6) Calculate the quintiles of co2_emission -print(np.quantile(food_consumption['co2_emission'], np.linspace(0, 1, 6))) +co2_quintiles = np.quantile(food_consumption['co2_emission'], np.linspace(0, 1, 6)) +print(co2_quintiles) # Q-7) Calculate the variance and standard deviation of co2_emission for food_categories -print(food_consumption.groupby('food_category')['co2_emission'].agg([np.var, np.std])) +var_and_std = food_consumption.groupby('food_category')['co2_emission'].agg([np.var, np.std]) +print(var_and_std) # Q-8) Create histogram of co2_emission for food_category 'beef' food_consumption[food_consumption['food_category'] == 'beef']['co2_emission'].hist()