Video library and YouTube#
This page shows stats about the video library and OLS YouTube
import matplotlib.pyplot as plt
import pandas as pd
import yt_dlp
Video library#
The video library contains available videos from talks in Open Seeds cohort calls.
baseurl = "https://raw.githubusercontent.com/open-life-science/open-life-science.github.io/main/_data/artifacts/openseeds/"
library_df = pd.read_csv(f"{baseurl}library.csv", index_col=0, na_filter= False)
Number of talks
len(library_df)
233
Speakers#
speaker_df = (
library_df
.groupby(by="speakers")
.count()
.drop(columns = ["title", "date", "cohort", "tag", "subtag", "recording"])
.rename(columns = {"slides": "Total"})
)
Number of speakers
len(speaker_df)
116
Mean number of talks per speaker
speaker_df["Total"].mean()
2.0086206896551726
Median number of talks per speaker
speaker_df["Total"].median()
1.0
Distribution of the number of talks per speaker
fig, ax = plt.subplots()
fig.set_dpi(300)
speaker_df.plot.hist(
bins=25, ax=ax, legend=False, color="#139D3D"
)
plt.xlabel('Number of talks')
Text(0.5, 0, 'Number of talks')
YouTube stats#
All videos from Open Seeds calls are uploaded on the OLS YouTube channel
%%capture
ydl_opts = {}
URL = "https://www.youtube.com/c/OpenLifeSci"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(URL, download=False)
# ydl.sanitize_info makes the info json-serializable
channel_content = ydl.sanitize_info(info)
# extract video information
videos = []
for v in channel_content['entries'][0]['entries']:
videos.append({key:v[key] for key in ['title', 'duration', 'view_count']})
yt_stat_df = (
pd.DataFrame(videos)
.assign(Duration=lambda df: df.duration/60)
.drop(columns=["duration"])
.rename(columns=str.capitalize)
)
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[10], line 4
2 URL = "https://www.youtube.com/c/OpenLifeSci"
3 with yt_dlp.YoutubeDL(ydl_opts) as ydl:
----> 4 info = ydl.extract_info(URL, download=False)
5 # ydl.sanitize_info makes the info json-serializable
6 channel_content = ydl.sanitize_info(info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1556, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
1554 raise ExistingVideoReached()
1555 break
-> 1556 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1557 else:
1558 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1567, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1565 while True:
1566 try:
-> 1567 return func(self, *args, **kwargs)
1568 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1569 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1723, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
1721 if process:
1722 self._wait_for_video(ie_result)
-> 1723 return self.process_ie_result(ie_result, download, extra_info)
1724 else:
1725 return ie_result
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1852, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1850 self._sanitize_thumbnails(ie_result)
1851 try:
-> 1852 return self.__process_playlist(ie_result, download)
1853 finally:
1854 self._playlist_level -= 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1996, in YoutubeDL.__process_playlist(self, ie_result, download)
1991 continue
1993 self.to_screen('[download] Downloading item %s of %s' % (
1994 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
-> 1996 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1997 'playlist_index': playlist_index,
1998 'playlist_autonumber': i + 1,
1999 }, extra))
2000 if not entry_result:
2001 failures += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1567, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1565 while True:
1566 try:
-> 1567 return func(self, *args, **kwargs)
1568 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1569 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:2028, in YoutubeDL.__process_iterable_entry(self, entry, download, extra_info)
2026 @_handle_extraction_exceptions
2027 def __process_iterable_entry(self, entry, download, extra_info):
-> 2028 return self.process_ie_result(
2029 entry, download=download, extra_info=extra_info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1852, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1850 self._sanitize_thumbnails(ie_result)
1851 try:
-> 1852 return self.__process_playlist(ie_result, download)
1853 finally:
1854 self._playlist_level -= 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1996, in YoutubeDL.__process_playlist(self, ie_result, download)
1991 continue
1993 self.to_screen('[download] Downloading item %s of %s' % (
1994 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
-> 1996 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1997 'playlist_index': playlist_index,
1998 'playlist_autonumber': i + 1,
1999 }, extra))
2000 if not entry_result:
2001 failures += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1567, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1565 while True:
1566 try:
-> 1567 return func(self, *args, **kwargs)
1568 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1569 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:2028, in YoutubeDL.__process_iterable_entry(self, entry, download, extra_info)
2026 @_handle_extraction_exceptions
2027 def __process_iterable_entry(self, entry, download, extra_info):
-> 2028 return self.process_ie_result(
2029 entry, download=download, extra_info=extra_info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1802, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1798 return ie_result
1799 elif result_type == 'url':
1800 # We have to add extra_info to the results because it may be
1801 # contained in a playlist
-> 1802 return self.extract_info(
1803 ie_result['url'], download,
1804 ie_key=ie_result.get('ie_key'),
1805 extra_info=extra_info)
1806 elif result_type == 'url_transparent':
1807 # Use the information from the embedding page
1808 info = self.extract_info(
1809 ie_result['url'], ie_key=ie_result.get('ie_key'),
1810 extra_info=extra_info, download=False, process=False)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1556, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
1554 raise ExistingVideoReached()
1555 break
-> 1556 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1557 else:
1558 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1567, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1565 while True:
1566 try:
-> 1567 return func(self, *args, **kwargs)
1568 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1569 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:1702, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
1699 self._apply_header_cookies(url)
1701 try:
-> 1702 ie_result = ie.extract(url)
1703 except UserNotLive as e:
1704 if process:
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/common.py:715, in InfoExtractor.extract(self, url)
712 self.initialize()
713 self.to_screen('Extracting URL: %s' % (
714 url if self.get_param('verbose') else truncate_string(url, 100, 20)))
--> 715 ie_result = self._real_extract(url)
716 if ie_result is None:
717 return None
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/youtube.py:4073, in YoutubeIE._real_extract(self, url)
4070 base_url = self.http_scheme() + '//www.youtube.com/'
4071 webpage_url = base_url + 'watch?v=' + video_id
-> 4073 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4075 playability_statuses = traverse_obj(
4076 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4078 trailer_video_id = get_first(
4079 playability_statuses,
4080 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4081 expected_type=str)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/youtube.py:4032, in YoutubeIE._download_player_responses(self, url, smuggled_data, video_id, webpage_url)
4030 if pp:
4031 query['pp'] = pp
-> 4032 webpage = self._download_webpage(
4033 webpage_url, video_id, fatal=False, query=query)
4035 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4037 player_responses, player_url = self._extract_player_responses(
4038 self._get_requested_clients(url, smuggled_data),
4039 video_id, webpage, master_ytcfg, smuggled_data)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/common.py:1118, in InfoExtractor._download_webpage(self, url_or_request, video_id, note, errnote, fatal, tries, timeout, *args, **kwargs)
1116 while True:
1117 try:
-> 1118 return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
1119 except IncompleteRead as e:
1120 try_count += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/common.py:1069, in InfoExtractor.__create_download_methods.<locals>.download_content(self, url_or_request, video_id, note, errnote, transform_source, fatal, encoding, data, headers, query, expected_status)
1067 kwargs.pop('transform_source')
1068 # The method is fetched by name so subclasses can override _download_..._handle
-> 1069 res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
1070 return res if res is False else res[0]
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/common.py:903, in InfoExtractor._download_webpage_handle(self, url_or_request, video_id, note, errnote, fatal, encoding, data, headers, query, expected_status)
900 if isinstance(url_or_request, str):
901 url_or_request = url_or_request.partition('#')[0]
--> 903 urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
904 if urlh is False:
905 assert not fatal
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/extractor/common.py:847, in InfoExtractor._request_webpage(self, url_or_request, video_id, note, errnote, fatal, data, headers, query, expected_status)
844 headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
846 try:
--> 847 return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
848 except network_exceptions as err:
849 if isinstance(err, HTTPError):
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/YoutubeDL.py:4052, in YoutubeDL.urlopen(self, req)
4049 clean_headers(req.headers)
4051 try:
-> 4052 return self._request_director.send(req)
4053 except NoSupportingHandlers as e:
4054 for ue in e.unsupported_errors:
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/networking/common.py:114, in RequestDirector.send(self, request)
112 self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
113 try:
--> 114 response = handler.send(request)
115 except RequestError:
116 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/networking/_helper.py:204, in wrap_request_errors.<locals>.wrapper(self, *args, **kwargs)
201 @functools.wraps(func)
202 def wrapper(self, *args, **kwargs):
203 try:
--> 204 return func(self, *args, **kwargs)
205 except UnsupportedRequest as e:
206 if e.handler is None:
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/networking/common.py:325, in RequestHandler.send(self, request)
323 if not isinstance(request, Request):
324 raise TypeError('Expected an instance of Request')
--> 325 return self._send(request)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/yt_dlp/networking/_requests.py:304, in RequestsRH._send(self, request)
300 session = self._get_instance(
301 cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
303 try:
--> 304 requests_res = session.request(
305 method=request.method,
306 url=request.url,
307 data=request.data,
308 headers=headers,
309 timeout=float(request.extensions.get('timeout') or self.timeout),
310 proxies=request.proxies or self.proxies,
311 allow_redirects=True,
312 stream=True
313 )
315 except requests.exceptions.TooManyRedirects as e:
316 max_redirects_exceeded = True
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
483 timeout = TimeoutSauce(connect=timeout, read=timeout)
485 try:
--> 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
501 raise ConnectionError(err, request=request)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/urllib3/connectionpool.py:790, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
787 response_conn = conn if not release_conn else None
789 # Make the request on the HTTPConnection object
--> 790 response = self._make_request(
791 conn,
792 method,
793 url,
794 timeout=timeout_obj,
795 body=body,
796 headers=headers,
797 chunked=chunked,
798 retries=retries,
799 response_conn=response_conn,
800 preload_content=preload_content,
801 decode_content=decode_content,
802 **response_kw,
803 )
805 # Everything went great!
806 clean_exit = True
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/urllib3/connectionpool.py:536, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
534 # Receive the response from the server
535 try:
--> 536 response = conn.getresponse()
537 except (BaseSSLError, OSError) as e:
538 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/site-packages/urllib3/connection.py:461, in HTTPConnection.getresponse(self)
458 from .response import HTTPResponse
460 # Get the response from http.client.HTTPConnection
--> 461 httplib_response = super().getresponse()
463 try:
464 assert_header_parsing(httplib_response.msg)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/http/client.py:1386, in HTTPConnection.getresponse(self)
1384 try:
1385 try:
-> 1386 response.begin()
1387 except ConnectionError:
1388 self.close()
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/http/client.py:325, in HTTPResponse.begin(self)
323 # read until we get a non-100 response
324 while True:
--> 325 version, status, reason = self._read_status()
326 if status != CONTINUE:
327 break
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/http/client.py:286, in HTTPResponse._read_status(self)
285 def _read_status(self):
--> 286 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
287 if len(line) > _MAXLINE:
288 raise LineTooLong("status line")
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
704 while True:
705 try:
--> 706 return self._sock.recv_into(b)
707 except timeout:
708 self._timeout_occurred = True
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/ssl.py:1315, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1311 if flags != 0:
1312 raise ValueError(
1313 "non-zero flags not allowed in calls to recv_into() on %s" %
1314 self.__class__)
-> 1315 return self.read(nbytes, buffer)
1316 else:
1317 return super().recv_into(buffer, nbytes, flags)
File /usr/share/miniconda/envs/ols-stats/lib/python3.11/ssl.py:1167, in SSLSocket.read(self, len, buffer)
1165 try:
1166 if buffer is not None:
-> 1167 return self._sslobj.read(len, buffer)
1168 else:
1169 return self._sslobj.read(len)
KeyboardInterrupt:
Number of videos on the YouTube channel
len(yt_stat_df)
158
Number of videos of the Open Seeds calls
openseeds_yt_df = yt_stat_df.query("Title.str.contains('OLS-')",engine="python")
len(openseeds_yt_df)
74
Duration#
Number of hours of Open Seeds videos on the YouTube channel
sum(openseeds_yt_df.Duration)/60
78.0469444444444
Mean duration (minutes) of Open Seeds videos
openseeds_yt_df.Duration.mean()
63.28130630630629
Longest and shortest Open Seeds videos
openseeds_yt_df.sort_values("Duration", ascending=False)
Title | View_count | Duration | |
---|---|---|---|
65 | OLS-4 Week 06 - Open Science I: Project Develo... | 67 | 105.883333 |
125 | OLS-3 graduation session 3 | 81 | 103.933333 |
4 | OLS-8: Open Leadership in Practice | 45 | 93.233333 |
2 | OLS-8 - Week 10: Open Science Garden II | 42 | 92.083333 |
52 | OLS-5 Week 06:Open science I: Project Developm... | 92 | 86.066667 |
... | ... | ... | ... |
31 | OLS-6: Week 13 - Personal Ecology & Social hour | 33 | 24.183333 |
60 | OLS-4 Week 13 - Self Care and Personal Ecology | 38 | 14.866667 |
47 | OLS-5 Week 13 - Self-care & Social call [Skill... | 42 | 12.816667 |
69 | Creating an issue on OLS-4 repository | 79 | 2.766667 |
34 | Call opens for application to the OLS-7 traini... | 30 | 0.450000 |
74 rows × 3 columns
Views#
Total number of view of the Open Seeds videos on the YouTube channel
sum(openseeds_yt_df.View_count)
6684
Mean number of views per Open Seeds videos
openseeds_yt_df.View_count.mean()
90.32432432432432
Videos most and least viewed Open Seeds videos
openseeds_yt_df.sort_values("View_count", ascending=False)
Title | View_count | Duration | |
---|---|---|---|
136 | OLS-3 Application Launch Webinar and Q&A - J... | 834 | 38.100000 |
32 | OLS-7 cohort launch application webinar | 382 | 34.650000 |
135 | OLS-3 Week 2 Cohort call 1: Welcome to Open Li... | 229 | 63.850000 |
140 | [OLS-2] Cohort call 6 - week 10 - Open Science... | 186 | 69.916667 |
154 | OLS-1 - Week 2 - Cohort Call 1 - Welcome to Op... | 171 | 56.616667 |
... | ... | ... | ... |
34 | Call opens for application to the OLS-7 traini... | 30 | 0.450000 |
64 | OLS-4 Week 08 - Community design for inclusivity | 28 | 68.900000 |
48 | OLS-5 Week 12 - Designing & Empowering for inc... | 24 | 59.416667 |
61 | OLS-4 Week 12 - Diversity and Inclusion & Ally... | 22 | 54.933333 |
10 | Open Seeds OLS-7 Graduation - Group 1 (Multist... | 18 | 84.916667 |
74 rows × 3 columns