# Parallel computing and large dataset handling using Dask import dask.dataframe as dd import pandas as pd import numpy as np # Generate sample data dates = pd.date_range(start="2023-01-01", periods=10000, freq="h") temperature = 20 + 10 * np.random.randn(len(dates)) precipitation = np.random.rand(len(dates)) # Create a Pandas DataFrame df = pd.DataFrame({ "date": dates, "temperature": temperature, "precipitation": precipitation }) # Convert the Pandas DataFrame to a Dask DataFrame ddf = dd.from_pandas(df, npartitions=10) # Calculate the mean temperature per day ddf["date"] = ddf["date"].dt.date # Convert to date only (without time) mean_temp_per_day = ddf.groupby("date").temperature.mean().compute() print("Mean Temperature Per Day:") mean_temp_per_day.head()