Spaces:
Sleeping
Sleeping
| import marimo | |
| __generated_with = "0.23.3" | |
| app = marimo.App() | |
| def _(): | |
| import marimo as mo | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from matplotlib.patches import Rectangle | |
| return Rectangle, mo, plt | |
| def _(mo): | |
| mo.md(""" | |
| # Bayes' Theorem: Updating Beliefs with Evidence | |
| Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence. | |
| The formula is: | |
| $$P(A|B) = rac{P(B|A)P(A)}{P(B)}$$ | |
| Where: | |
| - $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred | |
| - $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true | |
| - $P(A)$ is the **prior probability**: our initial belief about A before seeing B | |
| - $P(B)$ is the **evidence**: the overall probability of observing B | |
| """) | |
| return | |
| def _(mo): | |
| mo.md(""" | |
| ## Medical Testing Example | |
| Let's say we're testing for a rare disease: | |
| - Prevalence (prior probability): 0.1% of the population has the disease | |
| - Test accuracy: | |
| - If you have the disease, the test correctly identifies it 99% of the time (true positive rate) | |
| - If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate) | |
| What's the probability that someone who tests positive actually has the disease? | |
| """) | |
| return | |
| def _(mo): | |
| # Define the parameters | |
| prior_disease = 0.001 # P(Disease) | |
| sensitivity = 0.99 # P(Test+ | Disease) | |
| false_positive_rate = 0.05 # P(Test+ | No Disease) | |
| # Calculate the components | |
| # P(Test+) | |
| p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease) | |
| # Apply Bayes' Theorem | |
| posterior_disease = (sensitivity * prior_disease) / p_test_positive | |
| # Display results | |
| mo.md(f""" | |
| Given: | |
| - Prior probability of disease: {prior_disease * 100:.1f}% | |
| - Sensitivity (true positive rate): {sensitivity * 100}% | |
| - False positive rate: {false_positive_rate * 100}% | |
| Using Bayes' theorem: | |
| P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+) | |
| P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease) | |
| P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f} | |
| P(Test+) = {p_test_positive:.4f} | |
| P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f} | |
| **Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!** | |
| """) | |
| return | |
| def _(Rectangle, mo, plt): | |
| # Create a visualization showing the four categories | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| # Set up the grid | |
| ax.set_xlim(0, 10) | |
| ax.set_ylim(0, 8) | |
| # Draw rectangles for different categories | |
| # Population (1000 people) | |
| ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2)) | |
| ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom") | |
| # Disease (0.1%) | |
| disease_count = 1000 * 0.001 | |
| no_disease_count = 1000 - disease_count | |
| # Draw disease area | |
| ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5)) | |
| ax.text( | |
| 5, | |
| 1.5 + disease_count / 1000 * 3, | |
| f"Disease ({disease_count:.0f} people)", | |
| ha="center", | |
| va="center", | |
| color="white", | |
| weight="bold", | |
| ) | |
| # Draw non-disease area | |
| ax.add_patch( | |
| Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5) | |
| ) | |
| ax.text( | |
| 5, | |
| 1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3, | |
| f"No Disease ({no_disease_count:.0f} people)", | |
| ha="center", | |
| va="center", | |
| color="white", | |
| weight="bold", | |
| ) | |
| # Add labels | |
| ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center") | |
| ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center") | |
| # Add test results | |
| tp = disease_count * 0.99 # True positives | |
| fp = no_disease_count * 0.05 # False positives | |
| ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7)) | |
| ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center") | |
| ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7)) | |
| ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center") | |
| # Add total positive tests | |
| total_positives = tp + fp | |
| ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5)) | |
| ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center") | |
| ax.set_xticks([]) | |
| ax.set_yticks([]) | |
| ax.set_title("Bayes' Theorem Visualization: Medical Testing") | |
| mo.ui.matplotlib(plt.gca()) | |
| return | |
| def _(mo): | |
| mo.md(""" | |
| ## Why This Matters | |
| This example shows why Bayes' Theorem is important: | |
| 1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results | |
| 2. **95% accurate tests** can still give misleading results when the condition is rare | |
| 3. **Bayes' Theorem forces us to think about**: | |
| - Our initial beliefs (prior probability) | |
| - How likely we are to observe evidence given our beliefs | |
| - How to update our beliefs in light of new evidence | |
| This same logic applies to: | |
| - Spam detection | |
| - Financial risk assessment | |
| - Scientific hypothesis testing | |
| - Machine learning classification | |
| """) | |
| return | |
| def _(): | |
| return | |
| def _(): | |
| return | |
| if __name__ == "__main__": | |
| app.run() | |