more stats

4bef0944 · Loïc Lehnhoff · 4315160d · 4bef0944 · 4bef0944 · 4bef0944
Commit 4bef0944 authored 4 months ago by Loïc Lehnhoff
--- a/Annex_WCT_analysis.ipynb
+++ b/Annex_WCT_analysis.ipynb
--- a/WCT_analysis.ipynb
+++ b/WCT_analysis.ipynb
--- a/WCT_analysis_utils.py
+++ b/WCT_analysis_utils.py
@@ -747,7 +747,7 @@ def mandelbrot_law_fit(df):
 def vertical_proportion_plot(
    df, xcol, hue, xorder,
    legend_title="", xlabel="", ylabel="", maintitle="",
-    palette=["#648fff", "#dc267f", "#ffb000"]
+    palette=["#648fff", "#dc267f", "#ffb000"], show_sizes=True
    ):
    if len(xorder)>0:
@@ -769,6 +769,7 @@ def vertical_proportion_plot(
    # Add percentage labels to segments
    for i, state in enumerate(xorder):
+        if show_sizes:
            axs.text(i, 1, f"(N={xvalue_sizes[i]})", 
                ha='center', va='bottom', color='black')
@@ -809,7 +810,7 @@ def vertical_proportion_plot(
 def horizontal_proportion_plot(
    df, ycol, hue, yorder,
    legend_title="", xlabel="", ylabel="", maintitle="",
-    palette=["#648fff", "#dc267f", "#ffb000"]
+    palette=["#648fff", "#dc267f", "#ffb000"], show_sizes=True
    ):
    if len(yorder)>0:
@@ -831,6 +832,7 @@ def horizontal_proportion_plot(
    # Add percentage labels to segments
    for i, state in enumerate(yorder):
+        if show_sizes:
            axs.text(-0.025, i+0.025, f"(N={yvalue_sizes[i]})", 
                ha='right', va='top', color='black')
@@ -869,7 +871,7 @@ def horizontal_proportion_plot(
    fig.subplots_adjust(top=0.83)
    return fig, axs
-def fisher_tests(df, feature_col, feature_of_interest, group_col, alpha=0.05):
+def fisher_tests(df, feature_col, feature_of_interest, group_col, alpha=0.05, get_values=False):
    """
    Create a table showing proportions with letter annotations for significance groups.
@@ -900,6 +902,9 @@ def fisher_tests(df, feature_col, feature_of_interest, group_col, alpha=0.05):
    n_groups = len(groups)
    # Matrix to store p-values between each pair of groups
+    if get_values:
+        p_values = pd.DataFrame(columns=["mod_1", "mod_2", "reject_H0", "p_value", "odds_ratio"])
+    else:
        p_values = pd.DataFrame(columns=["mod_1", "mod_2", "reject_H0"])
    # Compute p-values for all pairs
@@ -915,8 +920,11 @@ def fisher_tests(df, feature_col, feature_of_interest, group_col, alpha=0.05):
        # Run Fisher's exact test
        table = np.array([[sig1, non_sig1], [sig2, non_sig2]])
-        _, p_value = stats.fisher_exact(table)
+        odds_ratio, p_value = stats.fisher_exact(table)
+        if get_values:
+            p_values.loc[len(p_values)] = [g1, g2, p_value < alpha, p_value, odds_ratio]
+        else:
            p_values.loc[len(p_values)] = [g1, g2, p_value < alpha]
    return p_values