Skip to content

Commit

Permalink
Merge branch 'main' into remove-unused-ignores-1
Browse files Browse the repository at this point in the history
  • Loading branch information
dangotbanned authored Feb 11, 2025
2 parents 4ea7e9c + 2d6df36 commit 0b35082
Show file tree
Hide file tree
Showing 85 changed files with 2,245 additions and 1,580 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: local-install
run: uv pip install -U --pre -e ".[dev, core, dask]" --system
run: uv pip install -U --pre -e ".[tests, core, dask]" --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/downstream_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ jobs:
run: |
cd tea-tasting
pdm remove narwhals
pdm add ./..[dev]
pdm add ./..[tests]
- name: show-deps
run: |
cd tea-tasting
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
run: uv pip install pipdeptree tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
- name: install-reqs
run: |
uv pip install -e ".[dev]" --system
uv pip install -e ".[tests]" --system
- name: show-deps
run: uv pip freeze
- name: Assert dependencies
Expand Down Expand Up @@ -64,7 +64,7 @@ jobs:
- name: install-pretty-old-versions
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
- name: install-reqs
run: uv pip install -e ".[dev]" --system
run: uv pip install -e ".[tests]" --system
- name: show-deps
run: uv pip freeze
- name: show-deptree
Expand Down Expand Up @@ -103,7 +103,7 @@ jobs:
- name: install-not-so-old-versions
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.0 dask[dataframe]==2024.10 tzdata --system
- name: install-reqs
run: uv pip install -e ".[dev]" --system
run: uv pip install -e ".[tests]" --system
- name: show-deps
run: uv pip freeze
- name: Assert not so old versions dependencies
Expand Down Expand Up @@ -140,7 +140,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev]" --system
run: uv pip install -e ".[tests]" --system
- name: install-kaggle
run: uv pip install kaggle --system
- name: Download Kaggle notebook artifact
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
# Python3.8 is technically at end-of-life, so we don't test everything
run: uv pip install -e ".[dev, core]" --system
run: uv pip install -e ".[tests, core]" --system
- name: show-deps
run: uv pip freeze
- name: Run pytest
Expand All @@ -49,7 +49,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev, core, extra, dask, modin]" --system
run: uv pip install -e ".[tests, core, extra, dask, modin]" --system
- name: install pyspark
run: uv pip install -e ".[pyspark]" --system
# PySpark is not yet available on Python3.12+
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev, core, extra, modin, dask]" --system
run: uv pip install -e ".[tests, core, extra, modin, dask]" --system
- name: install pyspark
run: uv pip install -e ".[pyspark]" --system
# PySpark is not yet available on Python3.12+
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/random_ci_pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: install-random-verions
run: uv pip install -r random-requirements.txt --system
- name: install-narwhals
run: uv pip install -e ".[dev]" --system
run: uv pip install -e ".[tests]" --system
- name: show versions
run: uv pip freeze
- name: Run pytest
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/typing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Type checking

on:
pull_request:
push:
branches: [main]

jobs:
mypy:
strategy:
matrix:
python-version: ["3.11"]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: Create venv
run: uv venv .venv
- name: install-reqs
# TODO: add more dependencies/backends incrementally
run: |
source .venv/bin/activate
uv pip install -e ".[tests, typing, core]"
- name: show-deps
run: |
source .venv/bin/activate
uv pip freeze
- name: Run mypy
run: |
source .venv/bin/activate
make typing
14 changes: 8 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ci:
autoupdate_schedule: monthly
skip: [mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
Expand All @@ -13,12 +14,6 @@ repos:
- id: ruff
alias: check-docstrings
entry: python utils/check_docstrings.py
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.14.1'
hooks:
- id: mypy
additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
files: ^(narwhals|tests)/
- repo: https://github.com/codespell-project/codespell
rev: 'v2.4.1'
hooks:
Expand Down Expand Up @@ -84,6 +79,13 @@ repos:
entry: pull_request_target
language: pygrep
files: ^\.github/workflows/
- id: mypy
name: mypy
entry: make typing
files: ^(narwhals|tests)/
language: system
types: [python]
require_serial: true
- repo: https://github.com/adamchainz/blacken-docs
rev: "1.19.1" # replace with latest tag on GitHub
hooks:
Expand Down
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Mostly based on polars Makefile
# https://github.com/pola-rs/polars/blob/main/py-polars/Makefile

.DEFAULT_GOAL := help

SHELL=bash
VENV=./.venv

ifeq ($(OS),Windows_NT)
VENV_BIN=$(VENV)/Scripts
else
VENV_BIN=$(VENV)/bin
endif


.PHONY: help
help: ## Display this help screen
@echo -e "\033[1mAvailable commands:\033[0m"
@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-22s\033[0m %s\n", $$1, $$2}' | sort

.PHONY: typing
typing: ## Run typing checks
$(VENV_BIN)/mypy
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- filter
- gather_every
- head
- hist
- implementation
- is_between
- is_duplicated
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ To verify the installation, start the Python REPL and execute:
```python
>>> import narwhals
>>> narwhals.__version__
'1.25.2'
'1.26.0'
```

If you see the version number, then the installation was successful!
Expand Down
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index

__version__ = "1.25.2"
__version__ = "1.26.0"

__all__ = [
"Array",
Expand Down
44 changes: 17 additions & 27 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from itertools import chain
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterator
Expand Down Expand Up @@ -44,10 +43,10 @@
from typing_extensions import Self
from typing_extensions import TypeAlias

from narwhals._arrow.expr import ArrowExpr
from narwhals._arrow.group_by import ArrowGroupBy
from narwhals._arrow.namespace import ArrowNamespace
from narwhals._arrow.series import ArrowSeries
from narwhals._arrow.typing import IntoArrowExpr
from narwhals.dtypes import DType
from narwhals.typing import SizeUnit
from narwhals.typing import _1DArray
Expand Down Expand Up @@ -342,8 +341,11 @@ def simple_select(self, *column_names: str) -> Self:
self._native_frame.select(list(column_names)), validate_column_names=False
)

def select(self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr) -> Self:
new_series: list[ArrowSeries] = evaluate_into_exprs(self, *exprs, **named_exprs)
def aggregate(self: Self, *exprs: ArrowExpr) -> Self:
return self.select(*exprs)

def select(self: Self, *exprs: ArrowExpr) -> Self:
new_series: list[ArrowSeries] = evaluate_into_exprs(self, *exprs)
if not new_series:
# return empty dataframe, like Polars does
return self._from_native_frame(
Expand All @@ -353,11 +355,9 @@ def select(self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr) -> S
df = pa.Table.from_arrays(broadcast_series(new_series), names=names)
return self._from_native_frame(df, validate_column_names=False)

def with_columns(
self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr
) -> Self:
def with_columns(self: Self, *exprs: ArrowExpr) -> Self:
native_frame = self._native_frame
new_columns: list[ArrowSeries] = evaluate_into_exprs(self, *exprs, **named_exprs)
new_columns: list[ArrowSeries] = evaluate_into_exprs(self, *exprs)

length = len(self)
columns = self.columns
Expand Down Expand Up @@ -407,9 +407,9 @@ def join(
)

return self._from_native_frame(
self.with_columns(**{key_token: plx.lit(0, None)})
self.with_columns(plx.lit(0, None).alias(key_token))
._native_frame.join(
other.with_columns(**{key_token: plx.lit(0, None)})._native_frame,
other.with_columns(plx.lit(0, None).alias(key_token))._native_frame,
keys=key_token,
right_keys=key_token,
join_type="inner",
Expand Down Expand Up @@ -532,23 +532,12 @@ def with_row_index(self: Self, name: str) -> Self:
df.append_column(name, row_indices).select([name, *cols])
)

def filter(self: Self, *predicates: IntoArrowExpr, **constraints: Any) -> Self:
if (
len(predicates) == 1
and isinstance(predicates[0], list)
and all(isinstance(x, bool) for x in predicates[0])
and not constraints
):
mask_native = predicates[0]
def filter(self: Self, predicate: ArrowExpr | list[bool]) -> Self:
if isinstance(predicate, list):
mask_native = predicate
else:
plx = self.__narwhals_namespace__()
expr = plx.all_horizontal(
*chain(
predicates, (plx.col(name) == v for name, v in constraints.items())
)
)
# `[0]` is safe as all_horizontal's expression only returns a single column
mask = expr._call(self)[0]
# `[0]` is safe as the predicate's expression only returns a single column
mask = evaluate_into_exprs(self, predicate)[0]
mask_native = broadcast_and_extract_dataframe_comparand(
length=len(self), other=mask, backend_version=self._backend_version
)
Expand Down Expand Up @@ -769,7 +758,8 @@ def unique(
)

keep_idx = self.simple_select(*subset).is_unique()
return self.filter(keep_idx)
plx = self.__narwhals_namespace__()
return self.filter(plx._create_expr_from_series(keep_idx))

def gather_every(self: Self, n: int, offset: int) -> Self:
return self._from_native_frame(
Expand Down
3 changes: 1 addition & 2 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.namespace import ArrowNamespace
from narwhals._arrow.typing import IntoArrowExpr
from narwhals.dtypes import DType
from narwhals.utils import Version

Expand Down Expand Up @@ -189,7 +188,7 @@ def __invert__(self: Self) -> Self:
def len(self: Self) -> Self:
return reuse_series_implementation(self, "len", returns_scalar=True)

def filter(self: Self, *predicates: IntoArrowExpr) -> Self:
def filter(self: Self, *predicates: ArrowExpr) -> Self:
plx = self.__narwhals_namespace__()
other = plx.all_horizontal(*predicates)
return reuse_series_implementation(self, "filter", other=other)
Expand Down
Loading

0 comments on commit 0b35082

Please sign in to comment.