diff options
author | Christian Cleberg <hello@cleberg.net> | 2024-01-09 21:03:20 -0600 |
---|---|---|
committer | Christian Cleberg <hello@cleberg.net> | 2024-01-09 21:03:20 -0600 |
commit | ce4abbd687e6b5500589d81ef506406142e6c6cd (patch) | |
tree | e19d37a213c69f9d1df7d2f227d69a29e1f71ed1 | |
parent | b5590008a2a8a1dba0defca9f6f3bb0377c6365b (diff) | |
download | data-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.tar.gz data-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.tar.bz2 data-science-ce4abbd687e6b5500589d81ef506406142e6c6cd.zip |
feat: add polars notebook
-rw-r--r-- | notebooks/polars.ipynb | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/notebooks/polars.ipynb b/notebooks/polars.ipynb new file mode 100644 index 0000000..1470c0a --- /dev/null +++ b/notebooks/polars.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Polars\n", + "\n", + "Testing out [Polars](https://pola.rs/)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: polars[all] in /Users/cmc/Library/Python/3.9/lib/python/site-packages (0.20.3)\n", + "Collecting connectorx>=0.3.2\n", + " Downloading connectorx-0.3.2-cp39-cp39-macosx_11_0_arm64.whl (43.7 MB)\n", + "\u001b[K |████████████████████████████████| 43.7 MB 709 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting pydantic\n", + " Downloading pydantic-2.5.3-py3-none-any.whl (381 kB)\n", + "\u001b[K |████████████████████████████████| 381 kB 31.7 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pandas\n", + " Downloading pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl (11.0 MB)\n", + "\u001b[K |████████████████████████████████| 11.0 MB 46.7 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting sqlalchemy\n", + " Downloading SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl (2.1 MB)\n", + "\u001b[K |████████████████████████████████| 2.1 MB 8.0 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyiceberg>=0.5.0\n", + " Downloading pyiceberg-0.5.1.tar.gz (418 kB)\n", + "\u001b[K |████████████████████████████████| 418 kB 14.7 MB/s eta 0:00:01\n", + "\u001b[?25h\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Additional context:\n", + " user = False\n", + " home = None\n", + " root = None\n", + " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/normal'\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Additional context:\n", + " user = False\n", + " home = None\n", + " root = None\n", + " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-nk7hp5uy/overlay'\u001b[0m\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing wheel metadata ... \u001b[?25ldone\n", + "\u001b[?25hCollecting gevent\n", + " Downloading gevent-23.9.1-cp39-cp39-macosx_11_0_universal2.whl (2.9 MB)\n", + "\u001b[K |████████████████████████████████| 2.9 MB 27.4 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.16.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from polars[all]) (1.26.3)\n", + "Collecting cloudpickle\n", + " Downloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n", + "Collecting adbc_driver_sqlite\n", + " Downloading adbc_driver_sqlite-0.9.0-py3-none-macosx_11_0_arm64.whl (696 kB)\n", + "\u001b[K |████████████████████████████████| 696 kB 27.0 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting xlsxwriter\n", + " Downloading XlsxWriter-3.1.9-py3-none-any.whl (154 kB)\n", + "\u001b[K |████████████████████████████████| 154 kB 11.5 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting xlsx2csv>=0.8.0\n", + " Downloading xlsx2csv-0.8.2.tar.gz (227 kB)\n", + "\u001b[K |████████████████████████████████| 227 kB 42.7 MB/s eta 0:00:01\n", + "\u001b[?25h\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Additional context:\n", + " user = False\n", + " home = None\n", + " root = None\n", + " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/normal'\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-purelib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Value for prefixed-platlib does not match. Please report this to <https://github.com/pypa/pip/issues/10151>\n", + " distutils: /private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay/lib/python3.9/site-packages\n", + " sysconfig: /Library/Python/3.9/site-packages\u001b[0m\n", + "\u001b[33m WARNING: Additional context:\n", + " user = False\n", + " home = None\n", + " root = None\n", + " prefix = '/private/var/folders/7q/fr3mjnb5279fv8k1tz643s140000gn/T/pip-build-env-05_qjymb/overlay'\u001b[0m\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing wheel metadata ... \u001b[?25ldone\n", + "\u001b[?25h\u001b[33m WARNING: Generating metadata for package xlsx2csv produced metadata for project name unknown. Fix your #egg=xlsx2csv fragments.\u001b[0m\n", + "\u001b[33mWARNING: Discarding https://files.pythonhosted.org/packages/c9/d1/70613896bd07a49cfb19312838b8ffb29232e6be5c8e381b953473468e5b/xlsx2csv-0.8.2.tar.gz#sha256=cdd272c82f8b32f1cee76aeaef87b2ee3549661fddf90f7ecf2310967a16fc84 (from https://pypi.org/simple/xlsx2csv/). Requested unknown from https://files.pythonhosted.org/packages/c9/d1/70613896bd07a49cfb19312838b8ffb29232e6be5c8e381b953473468e5b/xlsx2csv-0.8.2.tar.gz#sha256=cdd272c82f8b32f1cee76aeaef87b2ee3549661fddf90f7ecf2310967a16fc84 (from polars[all]) has inconsistent name: filename has 'xlsx2csv', but metadata has 'UNKNOWN'\u001b[0m\n", + " Downloading xlsx2csv-0.8.1-py3-none-any.whl (13 kB)\n", + "Collecting deltalake>=0.14.0\n", + " Downloading deltalake-0.15.1-cp38-abi3-macosx_11_0_arm64.whl (19.1 MB)\n", + "\u001b[K |████████████████████████████████| 19.1 MB 14.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: fsspec in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from polars[all]) (2023.12.2)\n", + "Collecting pyarrow>=7.0.0\n", + " Downloading pyarrow-14.0.2-cp39-cp39-macosx_11_0_arm64.whl (24.0 MB)\n", + "\u001b[K |████████████████████████████████| 24.0 MB 1.1 MB/s eta 0:00:011\n", + "\u001b[?25hCollecting hvplot>=0.9.1\n", + " Downloading hvplot-0.9.1-py2.py3-none-any.whl (3.2 MB)\n", + "\u001b[K |████████████████████████████████| 3.2 MB 22.8 MB/s eta 0:00:01 |█████▍ | 542 kB 22.8 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyarrow-hotfix\n", + " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "Collecting colorcet>=2\n", + " Downloading colorcet-3.0.1-py2.py3-none-any.whl (1.7 MB)\n", + "\u001b[K |████████████████████████████████| 1.7 MB 34.2 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting param<3.0,>=1.12.0\n", + " Downloading param-2.0.1-py3-none-any.whl (113 kB)\n", + "\u001b[K |████████████████████████████████| 113 kB 44.1 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: packaging in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from hvplot>=0.9.1->polars[all]) (23.1)\n", + "Collecting bokeh>=1.0.0\n", + " Downloading bokeh-3.3.3-py3-none-any.whl (6.8 MB)\n", + "\u001b[K |████████████████████████████████| 6.8 MB 11.7 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting holoviews>=1.11.0\n", + " Downloading holoviews-1.18.1-py2.py3-none-any.whl (4.3 MB)\n", + "\u001b[K |████████████████████████████████| 4.3 MB 37.8 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting panel>=0.11.0\n", + " Downloading panel-1.3.6-py2.py3-none-any.whl (20.8 MB)\n", + "\u001b[K |████████████████████████████████| 20.8 MB 14.8 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: Jinja2>=2.9 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (3.1.2)\n", + "Collecting pillow>=7.1.0\n", + " Downloading pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl (3.3 MB)\n", + "\u001b[K |████████████████████████████████| 3.3 MB 14.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: tornado>=5.1 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (6.3.3)\n", + "Collecting xyzservices>=2021.09.1\n", + " Downloading xyzservices-2023.10.1-py3-none-any.whl (56 kB)\n", + "\u001b[K |████████████████████████████████| 56 kB 18.1 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=3.10 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (6.0.1)\n", + "Collecting contourpy>=1\n", + " Downloading contourpy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl (242 kB)\n", + "\u001b[K |████████████████████████████████| 242 kB 63.1 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyct>=0.4.4\n", + " Downloading pyct-0.5.0-py2.py3-none-any.whl (15 kB)\n", + "Collecting pyviz-comms>=0.7.4\n", + " Downloading pyviz_comms-3.0.0-py3-none-any.whl (82 kB)\n", + "\u001b[K |████████████████████████████████| 82 kB 3.2 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from Jinja2>=2.9->bokeh>=1.0.0->hvplot>=0.9.1->polars[all]) (2.1.3)\n", + "Collecting pytz>=2020.1\n", + " Using cached pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from pandas->polars[all]) (2.8.2)\n", + "Collecting tzdata>=2022.1\n", + " Downloading tzdata-2023.4-py2.py3-none-any.whl (346 kB)\n", + "\u001b[K |████████████████████████████████| 346 kB 20.1 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting tqdm>=4.48.0\n", + " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Collecting markdown-it-py\n", + " Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", + "Requirement already satisfied: bleach in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (6.0.0)\n", + "Collecting markdown\n", + " Downloading Markdown-3.5.1-py3-none-any.whl (102 kB)\n", + "\u001b[K |████████████████████████████████| 102 kB 26.5 MB/s ta 0:00:01\n", + "\u001b[?25hCollecting linkify-it-py\n", + " Downloading linkify_it_py-2.0.2-py3-none-any.whl (19 kB)\n", + "Requirement already satisfied: typing-extensions in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (4.8.0)\n", + "Requirement already satisfied: requests in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2.31.0)\n", + "Collecting mdit-py-plugins\n", + " Downloading mdit_py_plugins-0.4.0-py3-none-any.whl (54 kB)\n", + "\u001b[K |████████████████████████████████| 54 kB 12.4 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting sortedcontainers==2.4.0\n", + " Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n", + "Collecting rich<14.0.0,>=10.11.0\n", + " Downloading rich-13.7.0-py3-none-any.whl (240 kB)\n", + "\u001b[K |████████████████████████████████| 240 kB 52.6 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting click<9.0.0,>=7.1.1\n", + " Downloading click-8.1.7-py3-none-any.whl (97 kB)\n", + "\u001b[K |████████████████████████████████| 97 kB 9.8 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting strictyaml<2.0.0,>=1.7.0\n", + " Downloading strictyaml-1.7.3-py3-none-any.whl (123 kB)\n", + "\u001b[K |████████████████████████████████| 123 kB 52.8 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyparsing<4.0.0,>=3.1.0\n", + " Using cached pyparsing-3.1.1-py3-none-any.whl (103 kB)\n", + "Collecting mmhash3<4.0.0,>=3.0.0\n", + " Downloading mmhash3-3.0.1-cp39-cp39-macosx_11_0_arm64.whl (12 kB)\n", + "Collecting pydantic-core==2.14.6\n", + " Downloading pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl (1.7 MB)\n", + "\u001b[K |████████████████████████████████| 1.7 MB 10.7 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting annotated-types>=0.4.0\n", + " Downloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: six>=1.5 in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas->polars[all]) (1.15.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from requests->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.4)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from rich<14.0.0,>=10.11.0->pyiceberg>=0.5.0->polars[all]) (2.16.1)\n", + "Collecting mdurl~=0.1\n", + " Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Collecting adbc-driver-manager\n", + " Downloading adbc_driver_manager-0.9.0-cp39-cp39-macosx_11_0_arm64.whl (261 kB)\n", + "\u001b[K |████████████████████████████████| 261 kB 3.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: webencodings in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from bleach->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (0.5.1)\n", + "Collecting zope.event\n", + " Downloading zope.event-5.0-py3-none-any.whl (6.8 kB)\n", + "Collecting zope.interface\n", + " Downloading zope.interface-6.1-cp39-cp39-macosx_11_0_arm64.whl (202 kB)\n", + "\u001b[K |████████████████████████████████| 202 kB 18.6 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting greenlet>=2.0.0\n", + " Downloading greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl (269 kB)\n", + "\u001b[K |████████████████████████████████| 269 kB 5.4 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting uc-micro-py\n", + " Downloading uc_micro_py-1.0.2-py3-none-any.whl (6.2 kB)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from markdown->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (6.8.0)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/cmc/Library/Python/3.9/lib/python/site-packages (from importlib-metadata>=4.4->markdown->panel>=0.11.0->hvplot>=0.9.1->polars[all]) (3.17.0)\n", + "Requirement already satisfied: setuptools in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from zope.event->gevent->polars[all]) (58.0.4)\n", + "Building wheels for collected packages: pyiceberg\n", + " Building wheel for pyiceberg (PEP 517) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for pyiceberg: filename=pyiceberg-0.5.1-cp39-cp39-macosx_14_0_arm64.whl size=527582 sha256=460f2fb33d28df351dd1f65841874e740e9e5f1d4953f861d877ec4b0e94911c\n", + " Stored in directory: /Users/cmc/Library/Caches/pip/wheels/fb/8c/3f/f25c02bddafeae467122e8757c9275c5981cf708f1d182f494\n", + "Successfully built pyiceberg\n", + "Installing collected packages: tzdata, pytz, mdurl, xyzservices, uc-micro-py, pillow, param, pandas, markdown-it-py, contourpy, tqdm, pyviz-comms, pyct, mdit-py-plugins, markdown, linkify-it-py, bokeh, pydantic-core, panel, colorcet, annotated-types, zope.interface, zope.event, strictyaml, sortedcontainers, rich, pyparsing, pydantic, pyarrow-hotfix, pyarrow, mmhash3, holoviews, greenlet, click, adbc-driver-manager, xlsxwriter, xlsx2csv, sqlalchemy, pyiceberg, hvplot, gevent, deltalake, connectorx, cloudpickle, adbc-driver-sqlite\n", + "Successfully installed adbc-driver-manager-0.9.0 adbc-driver-sqlite-0.9.0 annotated-types-0.6.0 bokeh-3.3.3 click-8.1.7 cloudpickle-3.0.0 colorcet-3.0.1 connectorx-0.3.2 contourpy-1.2.0 deltalake-0.15.1 gevent-23.9.1 greenlet-3.0.3 holoviews-1.18.1 hvplot-0.9.1 linkify-it-py-2.0.2 markdown-3.5.1 markdown-it-py-3.0.0 mdit-py-plugins-0.4.0 mdurl-0.1.2 mmhash3-3.0.1 pandas-2.1.4 panel-1.3.6 param-2.0.1 pillow-10.2.0 pyarrow-14.0.2 pyarrow-hotfix-0.6 pyct-0.5.0 pydantic-2.5.3 pydantic-core-2.14.6 pyiceberg-0.5.1 pyparsing-3.1.1 pytz-2023.3.post1 pyviz-comms-3.0.0 rich-13.7.0 sortedcontainers-2.4.0 sqlalchemy-2.0.25 strictyaml-1.7.3 tqdm-4.66.1 tzdata-2023.4 uc-micro-py-1.0.2 xlsx2csv-0.8.1 xlsxwriter-3.1.9 xyzservices-2023.10.1 zope.event-5.0 zope.interface-6.1\n", + "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 23.3.2 is available.\n", + "You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install 'polars[all]'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (3, 3)\n", + "┌─────────┬─────────────────────┬───────┐\n", + "│ integer ┆ date ┆ float │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ datetime[μs] ┆ f64 │\n", + "╞═════════╪═════════════════════╪═══════╡\n", + "│ 1 ┆ 2022-01-01 00:00:00 ┆ 4.0 │\n", + "│ 2 ┆ 2022-01-02 00:00:00 ┆ 5.0 │\n", + "│ 3 ┆ 2022-01-03 00:00:00 ┆ 6.0 │\n", + "└─────────┴─────────────────────┴───────┘\n" + ] + } + ], + "source": [ + "df = pl.DataFrame(\n", + " {\n", + " \"integer\": [1, 2, 3],\n", + " \"date\": [\n", + " datetime(2022, 1, 1),\n", + " datetime(2022, 1, 2),\n", + " datetime(2022, 1, 3),\n", + " ],\n", + " \"float\": [4.0, 5.0, 6.0],\n", + " }\n", + ")\n", + "\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (16_598, 11)\n", + "┌───────┬─────────────────┬──────────┬──────┬───┬──────────┬──────────┬─────────────┬──────────────┐\n", + "│ Rank ┆ Name ┆ Platform ┆ Year ┆ … ┆ EU_Sales ┆ JP_Sales ┆ Other_Sales ┆ Global_Sales │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ i64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═══════╪═════════════════╪══════════╪══════╪═══╪══════════╪══════════╪═════════════╪══════════════╡\n", + "│ 1 ┆ Wii Sports ┆ Wii ┆ 2006 ┆ … ┆ 29.02 ┆ 3.77 ┆ 8.46 ┆ 82.74 │\n", + "│ 2 ┆ Super Mario ┆ NES ┆ 1985 ┆ … ┆ 3.58 ┆ 6.81 ┆ 0.77 ┆ 40.24 │\n", + "│ ┆ Bros. ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 3 ┆ Mario Kart Wii ┆ Wii ┆ 2008 ┆ … ┆ 12.88 ┆ 3.79 ┆ 3.31 ┆ 35.82 │\n", + "│ 4 ┆ Wii Sports ┆ Wii ┆ 2009 ┆ … ┆ 11.01 ┆ 3.28 ┆ 2.96 ┆ 33.0 │\n", + "│ ┆ Resort ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 16597 ┆ Men in Black ┆ GC ┆ 2003 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n", + "│ ┆ II: Alien ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ Escape ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 16598 ┆ SCORE ┆ PS2 ┆ 2008 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n", + "│ ┆ International ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ Baja 1000: T… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 16599 ┆ Know How 2 ┆ DS ┆ 2010 ┆ … ┆ 0.01 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n", + "│ 16600 ┆ Spirits & ┆ GBA ┆ 2003 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.01 │\n", + "│ ┆ Spells ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└───────┴─────────────────┴──────────┴──────┴───┴──────────┴──────────┴─────────────┴──────────────┘\n" + ] + } + ], + "source": [ + "df = pl.read_csv(\"../data_files/video_game_sales.csv\", ignore_errors=True)\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div><style>\n", + ".dataframe > thead > tr,\n", + ".dataframe > tbody > tr {\n", + " text-align: right;\n", + " white-space: pre-wrap;\n", + "}\n", + "</style>\n", + "<small>shape: (40, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Year</th><th>count</th></tr><tr><td>i64</td><td>u32</td></tr></thead><tbody><tr><td>1982</td><td>36</td></tr><tr><td>1999</td><td>338</td></tr><tr><td>2014</td><td>582</td></tr><tr><td>1993</td><td>60</td></tr><tr><td>2016</td><td>344</td></tr><tr><td>2008</td><td>1428</td></tr><tr><td>2009</td><td>1431</td></tr><tr><td>1988</td><td>15</td></tr><tr><td>2003</td><td>775</td></tr><tr><td>1994</td><td>121</td></tr><tr><td>2001</td><td>482</td></tr><tr><td>2020</td><td>1</td></tr><tr><td>…</td><td>…</td></tr><tr><td>1997</td><td>289</td></tr><tr><td>1991</td><td>41</td></tr><tr><td>2006</td><td>1008</td></tr><tr><td>1989</td><td>17</td></tr><tr><td>2017</td><td>3</td></tr><tr><td>2004</td><td>763</td></tr><tr><td>2005</td><td>941</td></tr><tr><td>1983</td><td>17</td></tr><tr><td>2015</td><td>614</td></tr><tr><td>2010</td><td>1259</td></tr><tr><td>1984</td><td>14</td></tr><tr><td>1995</td><td>219</td></tr></tbody></table></div>" + ], + "text/plain": [ + "shape: (40, 2)\n", + "┌──────┬───────┐\n", + "│ Year ┆ count │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ u32 │\n", + "╞══════╪═══════╡\n", + "│ 1982 ┆ 36 │\n", + "│ 1999 ┆ 338 │\n", + "│ 2014 ┆ 582 │\n", + "│ 1993 ┆ 60 │\n", + "│ … ┆ … │\n", + "│ 2015 ┆ 614 │\n", + "│ 2010 ┆ 1259 │\n", + "│ 1984 ┆ 14 │\n", + "│ 1995 ┆ 219 │\n", + "└──────┴───────┘" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.group_by(\"Year\").count()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div><style>\n", + ".dataframe > thead > tr,\n", + ".dataframe > tbody > tr {\n", + " text-align: right;\n", + " white-space: pre-wrap;\n", + "}\n", + "</style>\n", + "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Year</th><th>Genre</th></tr><tr><td>i64</td><td>str</td></tr></thead><tbody><tr><td>null</td><td>null</td></tr><tr><td>null</td><td>null</td></tr></tbody></table></div>" + ], + "text/plain": [ + "shape: (2, 2)\n", + "┌──────┬───────┐\n", + "│ Year ┆ Genre │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ str │\n", + "╞══════╪═══════╡\n", + "│ null ┆ null │\n", + "│ null ┆ null │\n", + "└──────┴───────┘" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select(pl.col(\"Year\").sort().head(2), pl.col(\"Genre\").filter(pl.col(\"Year\") == 1).sum())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |