Pairs bootstrap and correlation — PoL workshop on statistical inference, part 1 documentation (2024)

Dataset download

[2]:
import numpy as npimport polars as plimport numbaimport bebi103import bokehbokeh.io.output_notebook()

Loading BokehJS ...

"}}; function display_loaded(error = null) { const el = document.getElementById("a2a7efa2-9d5b-4385-90cc-56f72bdcbd86"); if (el != null) { const html = (() => { if (typeof root.Bokeh === "undefined") { if (error == null) { return "BokehJS is loading ..."; } else { return "BokehJS failed to load."; } } else { const prefix = `BokehJS ${root.Bokeh.version}`; if (error == null) { return `${prefix} successfully loaded.`; } else { return `${prefix} encountered errors while loading and may not function as expected.`; } } })(); el.innerHTML = html; if (error != null) { const wrapper = document.createElement("div"); wrapper.style.overflow = "auto"; wrapper.style.height = "5em"; wrapper.style.resize = "vertical"; const content = document.createElement("div"); content.style.fontFamily = "monospace"; content.style.whiteSpace = "pre-wrap"; content.style.backgroundColor = "rgb(255, 221, 221)"; content.textContent = error.stack ?? error.toString(); wrapper.append(content); el.append(wrapper); } } else if (Date.now() < root._bokeh_timeout) { setTimeout(() => display_loaded(error), 100); } } function run_callbacks() { try { root._bokeh_onload_callbacks.forEach(function(callback) { if (callback != null) callback(); }); } finally { delete root._bokeh_onload_callbacks } console.debug("Bokeh: all callbacks have finished"); } function load_libs(css_urls, js_urls, callback) { if (css_urls == null) css_urls = []; if (js_urls == null) js_urls = []; root._bokeh_onload_callbacks.push(callback); if (root._bokeh_is_loading > 0) { console.debug("Bokeh: BokehJS is being loaded, scheduling callback at", now()); return null; } if (js_urls == null || js_urls.length === 0) { run_callbacks(); return null; } console.debug("Bokeh: BokehJS not loaded, scheduling load and callback at", now()); root._bokeh_is_loading = css_urls.length + js_urls.length; function on_load() { root._bokeh_is_loading--; if (root._bokeh_is_loading === 0) { console.debug("Bokeh: all BokehJS libraries/stylesheets loaded"); run_callbacks() } } function on_error(url) { console.error("failed to load " + url); } for (let i = 0; i < css_urls.length; i++) { const url = css_urls[i]; const element = document.createElement("link"); element.onload = on_load; element.onerror = on_error.bind(null, url); element.rel = "stylesheet"; element.type = "text/css"; element.href = url; console.debug("Bokeh: injecting link tag for BokehJS stylesheet: ", url); document.body.appendChild(element); } for (let i = 0; i < js_urls.length; i++) { const url = js_urls[i]; const element = document.createElement('script'); element.onload = on_load; element.onerror = on_error.bind(null, url); element.async = false; element.src = url; console.debug("Bokeh: injecting script tag for BokehJS library: ", url); document.head.appendChild(element); } }; function inject_raw_css(css) { const element = document.createElement("style"); element.appendChild(document.createTextNode(css)); document.body.appendChild(element); } const js_urls = ["https://cdn.bokeh.org/bokeh/release/bokeh-3.4.1.min.js", "https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.1.min.js", "https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.1.min.js", "https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.1.min.js", "https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.4.1.min.js", "https://unpkg.com/@holoviz/panel@1.4.4/dist/panel.min.js"]; const css_urls = []; const inline_js = [ function(Bokeh) { Bokeh.set_log_level("info"); },function(Bokeh) { } ]; function run_inline_js() { if (root.Bokeh !== undefined || force === true) { try { for (let i = 0; i < inline_js.length; i++) { inline_js[i].call(root, root.Bokeh); } } catch (error) {display_loaded(error);throw error; }if (force === true) { display_loaded(); }} else if (Date.now() < root._bokeh_timeout) { setTimeout(run_inline_js, 100); } else if (!root._bokeh_failed_load) { console.log("Bokeh: BokehJS failed to load within specified timeout."); root._bokeh_failed_load = true; } else if (force !== true) { const cell = $(document.getElementById("a2a7efa2-9d5b-4385-90cc-56f72bdcbd86")).parents('.cell').data().cell; cell.output_area.append_execute_result(NB_LOAD_WARNING) } } if (root._bokeh_is_loading === 0) { console.debug("Bokeh: BokehJS loaded, going straight to plotting"); run_inline_js(); } else { load_libs(css_urls, js_urls, function() { console.debug("Bokeh: BokehJS plotting callback run at", now()); run_inline_js(); }); }}(window));

We continue our analysis of the drone sperm quality data set. Let’s load it and remind ourselves of the content.

[3]:
df = pl.read_csv( os.path.join(data_path, "bee_sperm.csv"), null_values=["NO SPERM", "No Sperm"], comment_prefix="#",)df.head()
[3]:

shape: (5, 18)

SpecimenTreatmentEnvironmentTreatmentNCSSSample IDColonyCageSampleSperm Volume per 500 ulQuantityViabilityRaw (%)QualityAge (d)InfertilAliveSpermQuantity MillionsAlive Sperm MillionsDead Sperm Millions
i64strstri64stri64i64i64i64i64f64f64i64i64i64f64f64f64
227"Control""Cage"1"C2-1-1"2112150000215000096.72638196.72638114020796172.152.0796170.070383
228"Control""Cage"1"C2-1-2"2122287500228750096.34980896.34980814022040012.28752.2040010.083499
229"Control""Cage"1"C2-1-3"213875008750098.7598.75140864060.08750.0864060.001094
230"Control""Cage"1"C2-1-4"2141875000187500093.28742193.28742114017491391.8751.7491390.125861
231"Control""Cage"1"C2-1-5"2151587500158750097.79250697.79250614015524561.58751.5524560.035044

Correlation

We might wish to investigate how two measured quantities are correlated. For example, if the number of dead sperm and the number of alive sperm are closely correlated, this would mean that a given drone produces some quantity of sperm and some fraction tend to be dead. Let’s take a look at this.

[4]:
# Set up plot on log scalep = bokeh.plotting.figure( frame_height=300, frame_width=300, x_axis_label="alive sperm (millions)", y_axis_label="dead sperm (millions)", x_axis_type="log", y_axis_type="log",)# Only use values greater than zero for log scaleinds = (pl.col("Alive Sperm Millions") > 0) & (pl.col("Dead Sperm Millions") > 0)# Populate glyphsfor color, ((treatment,), sub_df) in zip( bokeh.palettes.Category10_3, df.filter(inds).group_by("Treatment")): p.scatter( source=sub_df.to_dict(), x="Alive Sperm Millions", y="Dead Sperm Millions", color=color, legend_label=treatment, )p.legend.location = "bottom_right"bokeh.io.show(p)

There seems to be some correlation (on a log scale), but it is difficult to tell. We can compute the correlation with the bivariate correlation coefficient, also known as the Pearson correlation. It is the plug-in estimate of the correlation between variables (in this case alive and dead sperm). The correlation is the covariance divided by the geometric mean of the individual variances

The bivariate correlation coefficient is implemented with np.corrcoef(), but we will code our own and JIT it for speed.

[5]:
@numba.njitdef bivariate_r(x, y): """ Compute plug-in estimate for the bivariate correlation coefficient. """ return ( np.sum((x - np.mean(x)) * (y - np.mean(y))) / np.std(x) / np.std(y) / np.sqrt(len(x)) / np.sqrt(len(y)) )

We can use it to compute the bivariate correlation coefficient for the logarithm of alive and dead sperm.

[6]:
bivariate_r( np.log(df.filter(inds).get_column("Alive Sperm Millions").to_numpy()), np.log(df.filter(inds).get_column("Dead Sperm Millions").to_numpy()),)
[6]:
0.5219944217488051

Pairs bootstrap confidence intervals

How can we get a confidence interval on a correlation coefficient? We can again apply the bootstrap, but this time, the replicate is a pair of data, in this case a dead sperm count/alive sperm count pair. The process of drawing pairs of data points from an experiment and then computing bootstrap replicates from them is called pairs bootstrap. Let’s code it up for this example with the bivariate correlation.

Our strategy in coding up the pairs bootstrap is to draw bootstrap samples of the indices of measurement and use those indices to select the pairs.

[7]:
@numba.njitdef draw_bs_sample(data): """Draw a bootstrap sample from a 1D data set.""" return np.random.choice(data, size=len(data))@numba.njitdef draw_bs_pairs(x, y): """Draw a pairs bootstrap sample.""" inds = np.arange(len(x)) bs_inds = draw_bs_sample(inds) return x[bs_inds], y[bs_inds]

With our pairs sampling function in place, we can write a function to compute replicates.

[8]:
@numba.njit(parallel=True)def draw_bs_pairs_reps_bivariate(x, y, size=1): """ Draw bootstrap pairs replicates. """ out = np.empty(size) for i in numba.prange(size): out[i] = bivariate_r(*draw_bs_pairs(x, y)) return out

Finally, we can put it all together to compute confidence intervals on the correlation. To start, we extract all of the relevant measurements as Numpy arrays to allow for faster resampling (and that’s what our Numba’d functions require).

[9]:
# Extract NumPy arrays (only use values greater than zero for logs)inds = (pl.col("Alive Sperm Millions") > 0) & (pl.col("Dead Sperm Millions") > 0)dfs = df.filter(inds).partition_by('Treatment', as_dict=True)alive_ctrl = dfs[('Control',)].get_column("Alive Sperm Millions").to_numpy()alive_pest = dfs[('Pesticide',)].get_column("Alive Sperm Millions").to_numpy()dead_ctrl = dfs[('Control',)].get_column("Dead Sperm Millions").to_numpy()dead_pest = dfs[('Pesticide',)].get_column("Dead Sperm Millions").to_numpy()

Now we can compute the bootstrap replicates using our draw_bs_pairs_reps_bivariate() function.

[10]:
# Get repsbs_reps_ctrl = draw_bs_pairs_reps_bivariate( np.log(alive_ctrl), np.log(dead_ctrl), size=10_000)bs_reps_pest = draw_bs_pairs_reps_bivariate( np.log(alive_pest), np.log(dead_pest), size=10_000)

And from the replicates, we can compute and print the 95% confidence interval.

[11]:
# Get the confidence intervalsconf_int_ctrl = np.percentile(bs_reps_ctrl, [2.5, 97.5])conf_int_pest = np.percentile(bs_reps_pest, [2.5, 97.5])# Plot confidence intervalssummaries = [ dict( label="control", estimate=bivariate_r(np.log(alive_ctrl), np.log(dead_ctrl)), conf_int=conf_int_ctrl, ), dict( label="treated", estimate=bivariate_r(np.log(alive_pest), np.log(dead_pest)), conf_int=conf_int_pest, ),]bokeh.io.show( bebi103.viz.confints(summaries, x_axis_label="bivariate correlation of logs"))

We see a clear correlation in both samples, with a wide, but positive, confidence interval. Note that we did this analysis on a log scale, since the data span several orders of magnitude.

Computing environment

[12]:
%load_ext watermark%watermark -v -p numpy,polars,numba,bokeh,bebi103,jupyterlab
Python implementation: CPythonPython version : 3.12.4IPython version : 8.25.0numpy : 1.26.4polars : 1.1.0numba : 0.59.1bokeh : 3.4.1bebi103 : 0.1.21jupyterlab: 4.0.13
Pairs bootstrap and correlation — PoL workshop on statistical inference, part 1  documentation (2024)

References

Top Articles
Drive Mad Yandex
San Francisco, CA Real Estate & Homes for Sale | realtor.com®
Kostner Wingback Bed
Devin Mansen Obituary
Christian McCaffrey loses fumble to open Super Bowl LVIII
His Lost Lycan Luna Chapter 5
Practical Magic 123Movies
Jeremy Corbell Twitter
Terraria Enchanting
GAY (and stinky) DOGS [scat] by Entomb
Stolen Touches Neva Altaj Read Online Free
Atrium Shift Select
Spelunking The Den Wow
Springfield Mo Craiglist
Mini Handy 2024: Die besten Mini Smartphones | Purdroid.de
Nashville Predators Wiki
Pizza Hut In Dinuba
Lista trofeów | Jedi Upadły Zakon / Fallen Order - Star Wars Jedi Fallen Order - poradnik do gry | GRYOnline.pl
Puretalkusa.com/Amac
Dcf Training Number
Xfinity Outage Map Fredericksburg Va
Kingdom Tattoo Ithaca Mi
Www.craigslist.com Austin Tx
Haunted Mansion Showtimes Near Epic Theatres Of West Volusia
Wiseloan Login
D2L Brightspace Clc
Evil Dead Rise Ending Explained
Weather October 15
N.J. Hogenkamp Sons Funeral Home | Saint Henry, Ohio
134 Paige St. Owego Ny
Indiana Jones 5 Showtimes Near Jamaica Multiplex Cinemas
123Moviestvme
Bee And Willow Bar Cart
Craigslist Neworleans
10 Most Ridiculously Expensive Haircuts Of All Time in 2024 - Financesonline.com
Junee Warehouse | Imamother
About Us | SEIL
Caderno 2 Aulas Medicina - Matemática
Vision Source: Premier Network of Independent Optometrists
„Wir sind gut positioniert“
Stewartville Star Obituaries
Isabella Duan Ahn Stanford
Sarahbustani Boobs
Pink Runtz Strain, The Ultimate Guide
Brake Pads - The Best Front and Rear Brake Pads for Cars, Trucks & SUVs | AutoZone
Sechrest Davis Funeral Home High Point Nc
Petfinder Quiz
Rocket League Tracker: A useful tool for every player
Ephesians 4 Niv
Mit diesen geheimen Codes verständigen sich Crew-Mitglieder
Lake County Fl Trash Pickup Schedule
Craigslist Centre Alabama
Latest Posts
Article information

Author: Rubie Ullrich

Last Updated:

Views: 5634

Rating: 4.1 / 5 (52 voted)

Reviews: 83% of readers found this page helpful

Author information

Name: Rubie Ullrich

Birthday: 1998-02-02

Address: 743 Stoltenberg Center, Genovevaville, NJ 59925-3119

Phone: +2202978377583

Job: Administration Engineer

Hobby: Surfing, Sailing, Listening to music, Web surfing, Kitesurfing, Geocaching, Backpacking

Introduction: My name is Rubie Ullrich, I am a enthusiastic, perfect, tender, vivacious, talented, famous, delightful person who loves writing and wants to share my knowledge and understanding with you.