import pandas as pd
def demographic_data_analyzer():
df = pd.read_csv('adult.data', header=None, names=[
'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
'hours-per-week', 'native-country', 'salary'
])
race_count = df['race'].value_counts()
average_age_men = round(df[df['sex'] == 'Male']['age'].mean(), 1)
percentage_bachelors = round((df[df['education'] == 'Bachelors'].shape[0] / df.shape[0]) * 100, 1)
advanced_education = df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])
percentage_advanced_education_rich = round(
(df[advanced_education & (df['salary'] == '>50K')].shape[0] / df[advanced_education].shape[0]) * 100, 1)
not_advanced_education = ~advanced_education
percentage_not_advanced_education_rich = round(
(df[not_advanced_education & (df['salary'] == '>50K')].shape[0] / df[not_advanced_education].shape[0]) * 100, 1)
min_work_hours = df['hours-per-week'].min()
min_workers = df[df['hours-per-week'] == min_work_hours]
rich_percentage = round((min_workers[min_workers['salary'] == '>50K'].shape[0] / min_workers.shape[0]) * 100, 1)
countries = df[df['salary'] == '>50K']['native-country'].value_counts()
total_by_country = df['native-country'].value_counts()
highest_earning_country_percentage = round((countries / total_by_country * 100).max(), 1)
highest_earning_country = (countries / total_by_country * 100).idxmax()
india_high_earners = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]
top_IN_occupation = india_high_earners['occupation'].value_counts().idxmax()
return {
'race_count': race_count,
'average_age_men': average_age_men,
'percentage_bachelors': percentage_bachelors,
'percentage_advanced_education_rich': percentage_advanced_education_rich,
'percentage_not_advanced_education_rich': percentage_not_advanced_education_rich,
'min_work_hours': min_work_hours,
'rich_percentage': rich_percentage,
'highest_earning_country': highest_earning_country,
'highest_earning_country_percentage': highest_earning_country_percentage,
'top_IN_occupation': top_IN_occupation
}
if __name__ == "__main__":
results = demographic_data_analyzer()
for key, value in results.items():
print(f"{key}: {value}")
Click Run or press shift + ENTER to run code