Simplify tokenization pipeline, make it work with large numbers of shards again, (re)add configuration metadata to cache #3365

	name: Run entry tests

	on: [push, pull_request]

	jobs:
	build:
	if: github.event_name == 'push' \|\| (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository)
	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.10"]
	jax-version: ["0.4.26"]

	steps:
	- uses: actions/checkout@v3
	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.python-version }}
	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install flake8 pytest
	pip install .[test] "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
	pip install soundfile librosa
	- name: Run entry tests with pytest
	run: \|
	XLA_FLAGS=--xla_force_host_platform_device_count=8 PYTHONPATH=$(pwd)/tests:$(pwd)/src:$(pwd):. pytest tests -m entry

Provide feedback