plt.figure(figsize=(10, 5))

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'])
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o')
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o')
plt.title('Mean Fruit & Vegetable Consumption Over Years')
plt.xlabel('Year')
plt.ylabel('Mean Fruit & Vegetable Consumption')
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o')
plt.plot(health_survey_summary['year'], health_survey_summary['mean_alcohol'], marker='x')
plt.title('Mean Fruit & Vegetable vs Alcohol Consumption Over Years')
plt.xlabel('Year')
plt.ylabel('Mean Fruit & Vegetable Consumption')
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o', label='Fruit and Vegetables')
plt.plot(health_survey_summary['year'], health_survey_summary['mean_alcohol'], marker='x', label='Alcohol')
plt.title('Mean Fruit & Vegetable vs Alcohol Consumption Over Years')
plt.xlabel('Year')
plt.ylabel('Mean Fruit & Vegetable Consumption')
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o', label='Fruit and Vegetables')
plt.plot(health_survey_summary['year'], health_survey_summary['mean_alcohol'], marker='x', label='Alcohol')
plt.title('Mean Fruit & Vegetable vs Alcohol Consumption Over Years')
plt.xlabel('Year')
plt.ylabel('Mean Fruit & Vegetable Consumption')
plt.xlim(2016, 2022)  # Set x-axis limits
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(health_survey_summary['year'], health_survey_summary['mean_fruit_vegetables'], marker='o', label='Fruit and Vegetables', color='green')
plt.plot(health_survey_summary['year'], health_survey_summary['mean_alcohol'], marker='x', label='Alcohol', color='red')
plt.title('Mean Fruit & Vegetable vs Alcohol Consumption Over Years')
plt.xlabel('Year')
plt.ylabel('Mean Fruit & Vegetable Consumption')
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.scatter(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'],
    label='Data Points')
plt.title('Scatter Plot of Mean Alcohol Consumption vs Mental Wellbeing')
plt.xlabel('Mean Alcohol Consumption (Weekly Units)')
plt.ylabel('Mean Mental Wellbeing')
plt.show()

plt.figure(figsize=(10, 5))
plt.scatter(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'],
    c=health_survey_clean['mental_wellbeing'],
    cmap='viridis',
    label='Data Points'
)
plt.title('Scatter Plot of Mean Alcohol Consumption vs Mental Wellbeing')
plt.xlabel('Mean Alcohol Consumption (Weekly Units)')
plt.ylabel('Mean Mental Wellbeing')
plt.show()

coef = np.polyfit(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'], 1)
print(coef)

[-0.0054244   3.29097147]

p = np.poly1d(coef)
print(p)

 
-0.005424 x + 3.291

x_range = np.linspace(
    health_survey_clean['alcohol_consumption_mean_weekly_units'].min(),
    health_survey_clean['alcohol_consumption_mean_weekly_units'].max()
)

plt.figure(figsize=(10, 5))

plt.scatter(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'],
    c=health_survey_clean['mental_wellbeing'],
    cmap='viridis',
    label='Data Points'
)

# Plot the trendline
plt.plot(
    x_range,
    p(x_range),
    color='red', linestyle='--', label='Trendline'
)

plt.title('Scatter Plot with Trendline of Mean Alcohol Consumption vs Mental Wellbeing')
plt.xlabel('Mean Alcohol Consumption (Weekly Units)')
plt.ylabel('Mean Mental Wellbeing')
plt.legend()
plt.show()

coef_quad = np.polyfit(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'], 2)
print(coef_quad)

[ 3.49456328e-05 -8.32386556e-03  3.32052856e+00]

p_quad = np.poly1d(coef_quad)
print(p_quad)

           2
3.495e-05 x - 0.008324 x + 3.321

plt.figure(figsize=(10, 5))
plt.scatter(
    health_survey_clean['alcohol_consumption_mean_weekly_units'],
    health_survey_clean['mental_wellbeing'],
    c=health_survey_clean['mental_wellbeing'],
    cmap='viridis',
    label='Data Points'
)

plt.plot(
    x_range,
    p_quad(x_range),
    color='red', linestyle='--', label='Quadratic Trendline'
)

plt.title('Scatter Plot with Quadratic Trendline of Mean Alcohol Consumption vs Mental Wellbeing')
plt.xlabel('Mean Alcohol Consumption (Weekly Units)')
plt.ylabel('Mean Mental Wellbeing')
plt.legend()
plt.show()

Exercise 4: Data Visualization and Plotting in Python¶

Basic Plots¶

Step 1: Set the Figure Size¶

Step 2: Create a Line Plot¶

Step 3: Add Title and Labels¶

Step 4: Plot Multiple Lines¶

Step 5: Add a Legend¶

Step 6: Set Axis Limits¶

Step 7: Change the Colours¶

Scatter Plots¶

Trend Lines¶

Step 1: Calculate the Coefficients for the Trend Line¶

Step 2: Generate the Polynomial Function¶

Step 3: Generate a Smooth Range of X-Values¶

Step 4: Plot the Trend Line¶

Step 5: Fit a Second-Degree Polynomial¶

Summary¶

Library documentation¶

Further Resources¶