import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # Load data and set index to 'date' df = pd.read_csv("fcc-forum-pageviews.csv", index_col="date", parse_dates=True) # Clean data: Remove top 2.5% and bottom 2.5% of page views df = df[(df["value"] >= df["value"].quantile(0.025)) & (df["value"] <= df["value"].quantile(0.975))] def draw_line_plot(): fig, ax = plt.subplots(figsize=(12, 6)) ax.plot(df.index, df['value'], color='red', linewidth=1) ax.set_title("Daily freeCodeCamp Forum Page Views 5/2016-12/2019") ax.set_xlabel("Date") ax.set_ylabel("Page Views") fig.savefig('line_plot.png') return fig def draw_bar_plot(): df_bar = df.copy() df_bar['year'] = df_bar.index.year df_bar['month'] = df_bar.index.month df_bar_grouped = df_bar.groupby(['year', 'month'])['value'].mean().unstack() fig = df_bar_grouped.plot(kind='bar', figsize=(10, 6)).figure plt.xlabel("Years") plt.ylabel("Average Page Views") plt.legend(title="Months", labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]) fig.savefig('bar_plot.png') return fig def draw_box_plot(): df_box = df.copy() df_box.reset_index(inplace=True) df_box['year'] = df_box['date'].dt.year df_box['month'] = df_box['date'].dt.strftime('%b') df_box['month_num'] = df_box['date'].dt.month df_box = df_box.sort_values('month_num') fig, axes = plt.subplots(1, 2, figsize=(15, 6)) sns.boxplot(x='year', y='value', data=df_box, ax=axes[0]) axes[0].set_title('Year-wise Box Plot (Trend)') axes[0].set_xlabel('Year') axes[0].set_ylabel('Page Views') sns.boxplot(x='month', y='value', data=df_box, ax=axes[1]) axes[1].set_title('Month-wise Box Plot (Seasonality)') axes[1].set_xlabel('Month') axes[1].set_ylabel('Page Views') fig.savefig('box_plot.png') return fig if __name__ == "__main__": draw_line_plot() draw_bar_plot() draw_box_plot()
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np df = pd.read_csv("medical_examination.csv") df['BMI'] = df['weight'] / ((df['height'] / 100) ** 2) df['overweight'] = (df['BMI'] > 25).astype(int) df.drop(columns=['BMI'], inplace=True) df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1) df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1) def draw_cat_plot(): df_cat = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight']) df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='total') fig = sns.catplot(x='variable', y='total', hue='value', col='cardio', data=df_cat, kind='bar') return fig def draw_heat_map(): df_heat = df[ (df['ap_lo'] <= df['ap_hi']) & (df['height'] >= df['height'].quantile(0.025)) & (df['height'] <= df['height'].quantile(0.975)) & (df['weight'] >= df['weight'].quantile(0.025)) & (df['weight'] <= df['weight'].quantile(0.975)) ] corr = df_heat.corr() mask = np.triu(np.ones_like(corr, dtype=bool)) fig, ax = plt.subplots(figsize=(12, 10)) sns.heatmap(corr, mask=mask, annot=True, fmt='.1f', ax=ax, cmap='coolwarm', vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5}) return fig if __name__ == "__main__": draw_cat_plot() draw_heat_map()
import pandas as pd def demographic_data_analyzer(): df = pd.read_csv('adult.data', header=None, names=[ 'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'salary' ]) race_count = df['race'].value_counts() average_age_men = round(df[df['sex'] == 'Male']['age'].mean(), 1) percentage_bachelors = round((df[df['education'] == 'Bachelors'].shape[0] / df.shape[0]) * 100, 1) advanced_education = df['education'].isin(['Bachelors', 'Masters', 'Doctorate']) percentage_advanced_education_rich = round( (df[advanced_education & (df['salary'] == '>50K')].shape[0] / df[advanced_education].shape[0]) * 100, 1) not_advanced_education = ~advanced_education percentage_not_advanced_education_rich = round( (df[not_advanced_education & (df['salary'] == '>50K')].shape[0] / df[not_advanced_education].shape[0]) * 100, 1) min_work_hours = df['hours-per-week'].min() min_workers = df[df['hours-per-week'] == min_work_hours] rich_percentage = round((min_workers[min_workers['salary'] == '>50K'].shape[0] / min_workers.shape[0]) * 100, 1) countries = df[df['salary'] == '>50K']['native-country'].value_counts() total_by_country = df['native-country'].value_counts() highest_earning_country_percentage = round((countries / total_by_country * 100).max(), 1) highest_earning_country = (countries / total_by_country * 100).idxmax() india_high_earners = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')] top_IN_occupation = india_high_earners['occupation'].value_counts().idxmax() return { 'race_count': race_count, 'average_age_men': average_age_men, 'percentage_bachelors': percentage_bachelors, 'percentage_advanced_education_rich': percentage_advanced_education_rich, 'percentage_not_advanced_education_rich': percentage_not_advanced_education_rich, 'min_work_hours': min_work_hours, 'rich_percentage': rich_percentage, 'highest_earning_country': highest_earning_country, 'highest_earning_country_percentage': highest_earning_country_percentage, 'top_IN_occupation': top_IN_occupation } if __name__ == "__main__": results = demographic_data_analyzer() for key, value in results.items(): print(f"{key}: {value}")