Simplify tokenization pipeline, make it work with large numbers of shards again, (re)add configuration metadata to cache #4801

	name: Run tests

	on: [push, pull_request]

	jobs:
	build:
	if: github.event_name == 'push' \|\| (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository)

	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.10"]
	jax-version: ["0.4.26"]

	steps:
	- uses: actions/checkout@v3
	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.python-version }}
	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install .[test] "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
	pip install -r ./tests/requirements.txt
	- name: Test with pytest
	run: \|
	XLA_FLAGS=--xla_force_host_platform_device_count=8 PYTHONPATH=tests:src:. pytest tests -m "not entry and not slow and not ray"

Provide feedback