import pandas as pd import numpy as np
import matplotlib.pyplot as plt from matplotlib import style style.use('ggplot')
skill = { 'skill': ['Python', 'SQL', 'Power BI', 'Excel'], 'rate': np.random.randint(76, 100, size = 4) } data = pd.DataFrame(skill) print(data)
skill rate 0 Python 83 1 SQL 76 2 Power BI 99 3 Excel 95
plt.figure(figsize = (6,4)) plt.bar(data['skill'], data['rate'], color = 'orange') plt.title('Rate Of Skills') plt.tight_layout() plt.xticks(rotation = 45, ha = 'right') plt.show()
file_path = 'jupyter sales analysis.xlsx' sales = pd.read_excel(file_path) sales.head(3)
sales.columns
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Year', 'Month', 'Weekday', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit'], dtype='object')
sales.drop(['Row ID', 'Order ID', 'Customer ID', 'Postal Code', 'Product ID'], axis = 1, inplace = True)
sales.head(3)
sales.isnull().sum()
Order Date 0 Ship Date 0 Year 0 Month 0 Weekday 0 Ship Mode 0 Customer Name 0 Segment 0 Country 0 City 0 State 0 Region 0 Category 0 Sub-Category 0 Product Name 0 Sales 0 Quantity 0 Discount 0 Profit 0 dtype: int64
hist_data = sales[['Sales', 'Quantity', 'Discount', 'Profit']] hist_data.hist(bins = 20, figsize = (12,8), color = 'orange') plt.show()
state_sales = sales.groupby('State')['Sales'].sum().reset_index( name = 'Total Sales').sort_values( by = 'Total Sales', ascending = False).round(2) state_sales = state_sales.head(20) state_sales
plt.figure(figsize = (8,5)) plt.barh(state_sales['State'], state_sales['Total Sales'], color = 'orange') for state, sale in zip(state_sales['State'], state_sales['Total Sales']): plt.text(sale, state, f"{sale}", ha = 'left', va = 'center') plt.title('Top 20 States With Highest Sales', fontsize = 15) plt.show()
products = sales.groupby('Category')['Sales'].sum().reset_index( name = 'Total Sales').round(2) products['Percentage Sales'] = ((products['Total Sales'] / sum( products['Total Sales'])) * 100).round(2) products
total = round(sum(products['Total Sales']),2) color = ['blue', 'green', 'orange'] plt.figure(figsize = (8,5)) plt.pie(products['Total Sales'], labels = products['Category'], autopct = '%1.1f%%', colors = color, explode = [0.02, 0, 0], wedgeprops = {'width': 0.4}, textprops = {'fontsize': 12}) plt.text(0,0,f"Total\n{total}", ha = 'center', va = 'center', fontsize = 17) plt.title('Total Sales By Product Category') plt.show()
sales.columns
Index(['Order Date', 'Ship Date', 'Year', 'Month', 'Weekday', 'Ship Mode', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Region', 'Category', 'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit'], dtype='object')
year = sales.groupby('Year')[['Sales', 'Profit']].sum() year
year['% Profit Increase'] = (year['Profit'].pct_change() * 100).round(2) year['% Profit Increase'].fillna(0, inplace = True) year
script.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method. The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy. For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object. year['% Profit Increase'].fillna(0, inplace = True)
num_terms = 10 fibonacci = [0, 1] while len(fibonacci) < num_terms: next_term = fibonacci[-1] + fibonacci[-2] fibonacci.append(next_term) print(fibonacci)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]