aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Cleberg <hello@cleberg.net>2024-01-09 21:03:20 -0600
committerChristian Cleberg <hello@cleberg.net>2024-01-09 21:03:20 -0600
commitce4abbd687e6b5500589d81ef506406142e6c6cd (patch)
treee19d37a213c69f9d1df7d2f227d69a29e1f71ed1
parentb5590008a2a8a1dba0defca9f6f3bb0377c6365b (diff)
downloaddata-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.tar.gz
data-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.tar.bz2
data-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.zip
feat: add polars notebook
-rw-r--r--notebooks/polars.ipynb428
1 files changed, 428 insertions, 0 deletions
diff --git a/notebooks/polars.ipynb b/notebooks/polars.ipynb
new file mode 100644
index 0000000..1470c0a
--- /dev/null
+++ b/notebooks/polars.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Polars\n",
+ "\n",
+ "Testing out [Polars](https://pola.rs/)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Defaulting to user installation because normal site-packages is not writeable\n",
+ "Requirement already satisfied: polars[all] in /Users/cmc/Library/Python/3.9/lib/python/site-packages (0.20.3)\n",
+ "Collecting connectorx>=0.3.2\n",
+ " Downloading connectorx-0.3.2-cp39-cp39-macosx_11_0_arm64.whl (43.7 MB)\n",
+ "\u001b[K |████████████████████████████████| 43.7 MB 709 kB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pydantic\n",
+ " Downloading pydantic-2.5.3-py3-none-any.whl (381 kB)\n",
+ "\u001b[K |████████████████████████████████| 381 kB 31.7 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pandas\n",
+ " Downloading pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl (11.0 MB)\n",
+ "\u001b[K |████████████████████████████████| 11.0 MB 46.7 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting sqlalchemy\n",
+ " Downloading SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl (2.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 2.1 MB 8.0 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pyiceberg>=0.5.0\n",
+ " Downloading pyiceberg-0.5.1.tar.gz (418 kB)\n",
+ "\u001b[K |████████████████████████████████| 418 kB 14.7 MB/s eta 0:00:01\n",
+ "\u001b[?25h\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Additional context:\n",
+ " user = False\n",
+ " home = None\n",
+ " root = None\n",
+ " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal'\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Additional context:\n",
+ " user = False\n",
+ " home = None\n",
+ " root = None\n",
+ " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay'\u001b[0m\n",
+ " Installing build dependencies ... \u001b[?25ldone\n",
+ "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
+ "\u001b[?25h Preparing wheel metadata ... \u001b[?25ldone\n",
+ "\u001b[?25hCollecting gevent\n",
+ " Downloading gevent-23.9.1-cp39-cp39-macosx_11_0_universal2.whl (2.9 MB)\n",
+ "\u001b[K |████████████████████████████████| 2.9 MB 27.4 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: numpy>=1.16.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from polars[all]) (1.26.3)\n",
+ "Collecting cloudpickle\n",
+ " Downloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n",
+ "Collecting adbc_driver_sqlite\n",
+ " Downloading adbc_driver_sqlite-0.9.0-py3-none-macosx_11_0_arm64.whl (696 kB)\n",
+ "\u001b[K |████████████████████████████████| 696 kB 27.0 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting xlsxwriter\n",
+ " Downloading XlsxWriter-3.1.9-py3-none-any.whl (154 kB)\n",
+ "\u001b[K |████████████████████████████████| 154 kB 11.5 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting xlsx2csv>=0.8.0\n",
+ " Downloading xlsx2csv-0.8.2.tar.gz (227 kB)\n",
+ "\u001b[K |████████████████████████████████| 227 kB 42.7 MB/s eta 0:00:01\n",
+ "\u001b[?25h\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Additional context:\n",
+ " user = False\n",
+ " home = None\n",
+ " root = None\n",
+ " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal'\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n",
+ " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay/lib/python3.9/site-packages\n",
+ " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n",
+ "\u001b[33m WARNING: Additional context:\n",
+ " user = False\n",
+ " home = None\n",
+ " root = None\n",
+ " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay'\u001b[0m\n",
+ " Installing build dependencies ... \u001b[?25ldone\n",
+ "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
+ "\u001b[?25h Preparing wheel metadata ... \u001b[?25ldone\n",
+ "\u001b[?25h\u001b[33m WARNING: Generating metadata for package xlsx2csv produced metadata for project name unknown. Fix your #egg=xlsx2csv fragments.\u001b[0m\n",
+ "\u001b[33mWARNING: Discarding https://files.pythonhosted.org/packages/c9/d1/70613896bd07a49cfb19312838b8ffb29232e6be5c8e381b953473468e5b/xlsx2csv-0.8.2.tar.gz#sha256=cdd272c82f8b32f1cee76aeaef87b2ee3549661fddf90f7ecf2310967a16fc84 (from https://pypi.org/simple/xlsx2csv/). Requested unknown from https://files.pythonhosted.org/packages/c9/d1/70613896bd07a49cfb19312838b8ffb29232e6be5c8e381b953473468e5b/xlsx2csv-0.8.2.tar.gz#sha256=cdd272c82f8b32f1cee76aeaef87b2ee3549661fddf90f7ecf2310967a16fc84 (from polars[all]) has inconsistent name: filename has 'xlsx2csv', but metadata has 'UNKNOWN'\u001b[0m\n",
+ " Downloading xlsx2csv-0.8.1-py3-none-any.whl (13 kB)\n",
+ "Collecting deltalake>=0.14.0\n",
+ " Downloading deltalake-0.15.1-cp38-abi3-macosx_11_0_arm64.whl (19.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 19.1 MB 14.5 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: fsspec in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from polars[all]) (2023.12.2)\n",
+ "Collecting pyarrow>=7.0.0\n",
+ " Downloading pyarrow-14.0.2-cp39-cp39-macosx_11_0_arm64.whl (24.0 MB)\n",
+ "\u001b[K |████████████████████████████████| 24.0 MB 1.1 MB/s eta 0:00:011\n",
+ "\u001b[?25hCollecting hvplot>=0.9.1\n",
+ " Downloading hvplot-0.9.1-py2.py3-none-any.whl (3.2 MB)\n",
+ "\u001b[K |████████████████████████████████| 3.2 MB 22.8 MB/s eta 0:00:01 |█████▍ | 542 kB 22.8 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pyarrow-hotfix\n",
+ " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
+ "Collecting colorcet>=2\n",
+ " Downloading colorcet-3.0.1-py2.py3-none-any.whl (1.7 MB)\n",
+ "\u001b[K |████████████████████████████████| 1.7 MB 34.2 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting param<3.0,>=1.12.0\n",
+ " Downloading param-2.0.1-py3-none-any.whl (113 kB)\n",
+ "\u001b[K |████████████████████████████████| 113 kB 44.1 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: packaging in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from hvplot>=0.9.1->polars[all]) (23.1)\n",
+ "Collecting bokeh>=1.0.0\n",
+ " Downloading bokeh-3.3.3-py3-none-any.whl (6.8 MB)\n",
+ "\u001b[K |████████████████████████████████| 6.8 MB 11.7 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting holoviews>=1.11.0\n",
+ " Downloading holoviews-1.18.1-py2.py3-none-any.whl (4.3 MB)\n",
+ "\u001b[K |████████████████████████████████| 4.3 MB 37.8 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting panel>=0.11.0\n",
+ " Downloading panel-1.3.6-py2.py3-none-any.whl (20.8 MB)\n",
+ "\u001b[K |████████████████████████████████| 20.8 MB 14.8 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: Jinja2>=2.9 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (3.1.2)\n",
+ "Collecting pillow>=7.1.0\n",
+ " Downloading pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl (3.3 MB)\n",
+ "\u001b[K |████████████████████████████████| 3.3 MB 14.5 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: tornado>=5.1 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (6.3.3)\n",
+ "Collecting xyzservices>=2021.09.1\n",
+ " Downloading xyzservices-2023.10.1-py3-none-any.whl (56 kB)\n",
+ "\u001b[K |████████████████████████████████| 56 kB 18.1 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: PyYAML>=3.10 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (6.0.1)\n",
+ "Collecting contourpy>=1\n",
+ " Downloading contourpy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl (242 kB)\n",
+ "\u001b[K |████████████████████████████████| 242 kB 63.1 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pyct>=0.4.4\n",
+ " Downloading pyct-0.5.0-py2.py3-none-any.whl (15 kB)\n",
+ "Collecting pyviz-comms>=0.7.4\n",
+ " Downloading pyviz_comms-3.0.0-py3-none-any.whl (82 kB)\n",
+ "\u001b[K |████████████████████████████████| 82 kB 3.2 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from Jinja2>=2.9->bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (2.1.3)\n",
+ "Collecting pytz>=2020.1\n",
+ " Using cached pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from pandas->polars[all]) (2.8.2)\n",
+ "Collecting tzdata>=2022.1\n",
+ " Downloading tzdata-2023.4-py2.py3-none-any.whl (346 kB)\n",
+ "\u001b[K |████████████████████████████████| 346 kB 20.1 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting tqdm>=4.48.0\n",
+ " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
+ "Collecting markdown-it-py\n",
+ " Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n",
+ "Requirement already satisfied: bleach in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (6.0.0)\n",
+ "Collecting markdown\n",
+ " Downloading Markdown-3.5.1-py3-none-any.whl (102 kB)\n",
+ "\u001b[K |████████████████████████████████| 102 kB 26.5 MB/s ta 0:00:01\n",
+ "\u001b[?25hCollecting linkify-it-py\n",
+ " Downloading linkify_it_py-2.0.2-py3-none-any.whl (19 kB)\n",
+ "Requirement already satisfied: typing-extensions in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (4.8.0)\n",
+ "Requirement already satisfied: requests in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2.31.0)\n",
+ "Collecting mdit-py-plugins\n",
+ " Downloading mdit_py_plugins-0.4.0-py3-none-any.whl (54 kB)\n",
+ "\u001b[K |████████████████████████████████| 54 kB 12.4 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting sortedcontainers==2.4.0\n",
+ " Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n",
+ "Collecting rich<14.0.0,>=10.11.0\n",
+ " Downloading rich-13.7.0-py3-none-any.whl (240 kB)\n",
+ "\u001b[K |████████████████████████████████| 240 kB 52.6 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting click<9.0.0,>=7.1.1\n",
+ " Downloading click-8.1.7-py3-none-any.whl (97 kB)\n",
+ "\u001b[K |████████████████████████████████| 97 kB 9.8 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting strictyaml<2.0.0,>=1.7.0\n",
+ " Downloading strictyaml-1.7.3-py3-none-any.whl (123 kB)\n",
+ "\u001b[K |████████████████████████████████| 123 kB 52.8 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting pyparsing<4.0.0,>=3.1.0\n",
+ " Using cached pyparsing-3.1.1-py3-none-any.whl (103 kB)\n",
+ "Collecting mmhash3<4.0.0,>=3.0.0\n",
+ " Downloading mmhash3-3.0.1-cp39-cp39-macosx_11_0_arm64.whl (12 kB)\n",
+ "Collecting pydantic-core==2.14.6\n",
+ " Downloading pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl (1.7 MB)\n",
+ "\u001b[K |████████████████████████████████| 1.7 MB 10.7 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting annotated-types>=0.4.0\n",
+ " Downloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n",
+ "Requirement already satisfied: six>=1.5 in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas->polars[all]) (1.15.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.2.0)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2023.7.22)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.4)\n",
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from rich<14.0.0,>=10.11.0->pyiceberg>=0.5.0->polars[all]) (2.16.1)\n",
+ "Collecting mdurl~=0.1\n",
+ " Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n",
+ "Collecting adbc-driver-manager\n",
+ " Downloading adbc_driver_manager-0.9.0-cp39-cp39-macosx_11_0_arm64.whl (261 kB)\n",
+ "\u001b[K |████████████████████████████████| 261 kB 3.5 MB/s eta 0:00:01\n",
+ "\u001b[?25hRequirement already satisfied: webencodings in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bleach->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (0.5.1)\n",
+ "Collecting zope.event\n",
+ " Downloading zope.event-5.0-py3-none-any.whl (6.8 kB)\n",
+ "Collecting zope.interface\n",
+ " Downloading zope.interface-6.1-cp39-cp39-macosx_11_0_arm64.whl (202 kB)\n",
+ "\u001b[K |████████████████████████████████| 202 kB 18.6 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting greenlet>=2.0.0\n",
+ " Downloading greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl (269 kB)\n",
+ "\u001b[K |████████████████████████████████| 269 kB 5.4 MB/s eta 0:00:01\n",
+ "\u001b[?25hCollecting uc-micro-py\n",
+ " Downloading uc_micro_py-1.0.2-py3-none-any.whl (6.2 kB)\n",
+ "Requirement already satisfied: importlib-metadata>=4.4 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from markdown->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (6.8.0)\n",
+ "Requirement already satisfied: zipp>=0.5 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from importlib-metadata>=4.4->markdown->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.17.0)\n",
+ "Requirement already satisfied: setuptools in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from zope.event->gevent->polars[all]) (58.0.4)\n",
+ "Building wheels for collected packages: pyiceberg\n",
+ " Building wheel for pyiceberg (PEP 517) ... \u001b[?25ldone\n",
+ "\u001b[?25h Created wheel for pyiceberg: filename=pyiceberg-0.5.1-cp39-cp39-macosx_14_0_arm64.whl size=527582 sha256=460f2fb33d28df351dd1f65841874e740e9e5f1d4953f861d877ec4b0e94911c\n",
+ " Stored in directory: /Users/cmc/Library/Caches/pip/wheels/fb/8c/3f/f25c02bddafeae467122e8757c9275c5981cf708f1d182f494\n",
+ "Successfully built pyiceberg\n",
+ "Installing collected packages: tzdata, pytz, mdurl, xyzservices, uc-micro-py, pillow, param, pandas, markdown-it-py, contourpy, tqdm, pyviz-comms, pyct, mdit-py-plugins, markdown, linkify-it-py, bokeh, pydantic-core, panel, colorcet, annotated-types, zope.interface, zope.event, strictyaml, sortedcontainers, rich, pyparsing, pydantic, pyarrow-hotfix, pyarrow, mmhash3, holoviews, greenlet, click, adbc-driver-manager, xlsxwriter, xlsx2csv, sqlalchemy, pyiceberg, hvplot, gevent, deltalake, connectorx, cloudpickle, adbc-driver-sqlite\n",
+ "Successfully installed adbc-driver-manager-0.9.0 adbc-driver-sqlite-0.9.0 annotated-types-0.6.0 bokeh-3.3.3 click-8.1.7 cloudpickle-3.0.0 colorcet-3.0.1 connectorx-0.3.2 contourpy-1.2.0 deltalake-0.15.1 gevent-23.9.1 greenlet-3.0.3 holoviews-1.18.1 hvplot-0.9.1 linkify-it-py-2.0.2 markdown-3.5.1 markdown-it-py-3.0.0 mdit-py-plugins-0.4.0 mdurl-0.1.2 mmhash3-3.0.1 pandas-2.1.4 panel-1.3.6 param-2.0.1 pillow-10.2.0 pyarrow-14.0.2 pyarrow-hotfix-0.6 pyct-0.5.0 pydantic-2.5.3 pydantic-core-2.14.6 pyiceberg-0.5.1 pyparsing-3.1.1 pytz-2023.3.post1 pyviz-comms-3.0.0 rich-13.7.0 sortedcontainers-2.4.0 sqlalchemy-2.0.25 strictyaml-1.7.3 tqdm-4.66.1 tzdata-2023.4 uc-micro-py-1.0.2 xlsx2csv-0.8.1 xlsxwriter-3.1.9 xyzservices-2023.10.1 zope.event-5.0 zope.interface-6.1\n",
+ "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 23.3.2 is available.\n",
+ "You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install 'polars[all]'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import polars as pl\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "shape: (3, 3)\n",
+ "┌─────────┬─────────────────────┬───────┐\n",
+ "│ integer ┆ date ┆ float │\n",
+ "│ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ datetime[μs] ┆ f64 │\n",
+ "╞═════════╪═════════════════════╪═══════╡\n",
+ "│ 1 ┆ 2022-01-01 00:00:00 ┆ 4.0 │\n",
+ "│ 2 ┆ 2022-01-02 00:00:00 ┆ 5.0 │\n",
+ "│ 3 ┆ 2022-01-03 00:00:00 ┆ 6.0 │\n",
+ "└─────────┴─────────────────────┴───────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df = pl.DataFrame(\n",
+ " {\n",
+ " \"integer\": [1, 2, 3],\n",
+ " \"date\": [\n",
+ " datetime(2022, 1, 1),\n",
+ " datetime(2022, 1, 2),\n",
+ " datetime(2022, 1, 3),\n",
+ " ],\n",
+ " \"float\": [4.0, 5.0, 6.0],\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "print(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "shape: (16_598, 11)\n",
+ "┌───────┬─────────────────┬──────────┬──────┬───┬──────────┬──────────┬─────────────┬──────────────┐\n",
+ "│ Rank ┆ Name ┆ Platform ┆ Year ┆ … ┆ EU_Sales ┆ JP_Sales ┆ Other_Sales ┆ Global_Sales │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ str ┆ str ┆ i64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
+ "╞═══════╪═════════════════╪══════════╪══════╪═══╪══════════╪══════════╪═════════════╪══════════════╡\n",
+ "│ 1 ┆ Wii Sports ┆ Wii ┆ 2006 ┆ … ┆ 29.02 ┆ 3.77 ┆ 8.46 ┆ 82.74 │\n",
+ "│ 2 ┆ Super Mario ┆ NES ┆ 1985 ┆ … ┆ 3.58 ┆ 6.81 ┆ 0.77 ┆ 40.24 │\n",
+ "│ ┆ Bros. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 3 ┆ Mario Kart Wii ┆ Wii ┆ 2008 ┆ … ┆ 12.88 ┆ 3.79 ┆ 3.31 ┆ 35.82 │\n",
+ "│ 4 ┆ Wii Sports ┆ Wii ┆ 2009 ┆ … ┆ 11.01 ┆ 3.28 ┆ 2.96 ┆ 33.0 │\n",
+ "│ ┆ Resort ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 16597 ┆ Men in Black ┆ GC ┆ 2003 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n",
+ "│ ┆ II: Alien ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ┆ Escape ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 16598 ┆ SCORE ┆ PS2 ┆ 2008 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n",
+ "│ ┆ International ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ ┆ Baja 1000: T… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "│ 16599 ┆ Know How 2 ┆ DS ┆ 2010 ┆ … ┆ 0.01 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n",
+ "│ 16600 ┆ Spirits & ┆ GBA ┆ 2003 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n",
+ "│ ┆ Spells ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n",
+ "└───────┴─────────────────┴──────────┴──────┴───┴──────────┴──────────┴─────────────┴──────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df = pl.read_csv(\"../data_files/video_game_sales.csv\", ignore_errors=True)\n",
+ "print(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div><style>\n",
+ ".dataframe > thead > tr,\n",
+ ".dataframe > tbody > tr {\n",
+ " text-align: right;\n",
+ " white-space: pre-wrap;\n",
+ "}\n",
+ "</style>\n",
+ "<small>shape: (40, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Year</th><th>count</th></tr><tr><td>i64</td><td>u32</td></tr></thead><tbody><tr><td>1982</td><td>36</td></tr><tr><td>1999</td><td>338</td></tr><tr><td>2014</td><td>582</td></tr><tr><td>1993</td><td>60</td></tr><tr><td>2016</td><td>344</td></tr><tr><td>2008</td><td>1428</td></tr><tr><td>2009</td><td>1431</td></tr><tr><td>1988</td><td>15</td></tr><tr><td>2003</td><td>775</td></tr><tr><td>1994</td><td>121</td></tr><tr><td>2001</td><td>482</td></tr><tr><td>2020</td><td>1</td></tr><tr><td>&hellip;</td><td>&hellip;</td></tr><tr><td>1997</td><td>289</td></tr><tr><td>1991</td><td>41</td></tr><tr><td>2006</td><td>1008</td></tr><tr><td>1989</td><td>17</td></tr><tr><td>2017</td><td>3</td></tr><tr><td>2004</td><td>763</td></tr><tr><td>2005</td><td>941</td></tr><tr><td>1983</td><td>17</td></tr><tr><td>2015</td><td>614</td></tr><tr><td>2010</td><td>1259</td></tr><tr><td>1984</td><td>14</td></tr><tr><td>1995</td><td>219</td></tr></tbody></table></div>"
+ ],
+ "text/plain": [
+ "shape: (40, 2)\n",
+ "┌──────┬───────┐\n",
+ "│ Year ┆ count │\n",
+ "│ --- ┆ --- │\n",
+ "│ i64 ┆ u32 │\n",
+ "╞══════╪═══════╡\n",
+ "│ 1982 ┆ 36 │\n",
+ "│ 1999 ┆ 338 │\n",
+ "│ 2014 ┆ 582 │\n",
+ "│ 1993 ┆ 60 │\n",
+ "│ … ┆ … │\n",
+ "│ 2015 ┆ 614 │\n",
+ "│ 2010 ┆ 1259 │\n",
+ "│ 1984 ┆ 14 │\n",
+ "│ 1995 ┆ 219 │\n",
+ "└──────┴───────┘"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.group_by(\"Year\").count()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div><style>\n",
+ ".dataframe > thead > tr,\n",
+ ".dataframe > tbody > tr {\n",
+ " text-align: right;\n",
+ " white-space: pre-wrap;\n",
+ "}\n",
+ "</style>\n",
+ "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Year</th><th>Genre</th></tr><tr><td>i64</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td></tr><tr><td>null</td><td>null</td></tr></tbody></table></div>"
+ ],
+ "text/plain": [
+ "shape: (2, 2)\n",
+ "┌──────┬───────┐\n",
+ "│ Year ┆ Genre │\n",
+ "│ --- ┆ --- │\n",
+ "│ i64 ┆ str │\n",
+ "╞══════╪═══════╡\n",
+ "│ null ┆ null │\n",
+ "│ null ┆ null │\n",
+ "└──────┴───────┘"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.select(pl.col(\"Year\").sort().head(2), pl.col(\"Genre\").filter(pl.col(\"Year\") == 1).sum())"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}