Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pages to the ZIM and display them in the UI #28

Merged
merged 1 commit into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
docker build -t libretexts2zim .

- name: Run scraper
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --root-page-id 28207 --file-name-format "tests_en_libretexts-geo"

- name: Run integration test suite
run: docker run -v $PWD/scraper/tests-integration:/src/scraper/tests-integration -v $PWD/output:/output -e ZIM_FILE_PATH=/output/tests_en_libretexts-geo.zim libretexts2zim bash -c "pip install pytest; pytest -v /src/scraper/tests-integration"
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ See [README](README.md) for details about how to install with hatch virtualenv.

### Developing the ZIM UI in Vue.JS

When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. home.json, ...).
When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. shared.json, ...).

To simplify this, it is possible to:

Expand All @@ -24,10 +24,10 @@ To achieve this, first build the Docker image based on current code base.
docker build -t local-libretexts2zim .
```

Scrape a library (here we use the [GeoSciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).
Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).

```
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --overwrite
```

Extract interesting ZIM content and move it to `public` folder.
Expand Down
45 changes: 42 additions & 3 deletions scraper/src/libretexts2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@

id: LibraryPageId
title: str
path: str
parent: "LibraryPage | None" = None
children: list["LibraryPage"] = []

def __repr__(self) -> str:
return (
f"WikiPage(id='{self.id}', title='{self.title}', "
f"WikiPage(id='{self.id}', title='{self.title}', path='{self.path}' "
f"parent='{'None' if not self.parent else self.parent.id}', "
f"children='{','.join([child.id for child in self.children])}')"
)
Expand All @@ -52,6 +53,12 @@
return result


class LibraryPageContent(BaseModel):
"""Content of a given library page"""

html_body: str


class LibraryTree(BaseModel):
"""Class holding information about the tree of pages on a given library"""

Expand Down Expand Up @@ -246,14 +253,19 @@
)

root = LibraryPage(
id=tree_data["page"]["@id"], title=tree_data["page"]["title"]
id=tree_data["page"]["@id"],
title=tree_data["page"]["title"],
path=tree_data["page"]["path"]["#text"],
)
tree_obj = LibraryTree(root=root)
tree_obj.pages[root.id] = root

def _add_page(page_node: Any, parent: LibraryPage) -> LibraryPage:
page = LibraryPage(
id=page_node["@id"], title=page_node["title"], parent=parent
id=page_node["@id"],
title=page_node["title"],
path=page_node["path"]["#text"],
parent=parent,
)
parent.children.append(page)
tree_obj.pages[page.id] = page
Expand All @@ -274,6 +286,33 @@

return tree_obj

def get_page_content(self, page: LibraryPage) -> LibraryPageContent:
"""Returns the content of a given page"""

tree = self._get_api_json(

Check warning on line 292 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L292

Added line #L292 was not covered by tests
f"/pages/{page.id}/contents", timeout=HTTP_TIMEOUT_NORMAL_SECONDS
)
if not isinstance(tree["body"][0], str):
raise LibreTextsParsingError(

Check warning on line 296 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L296

Added line #L296 was not covered by tests
f"First body element of /pages/{page.id}/contents is not a string"
)
if not isinstance(tree["body"][1], dict):
raise LibreTextsParsingError(

Check warning on line 300 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L300

Added line #L300 was not covered by tests
f"Second body element of /pages/{page.id}/contents is not a dict"
)
if "@target" not in tree["body"][1]:
raise LibreTextsParsingError(

Check warning on line 304 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L304

Added line #L304 was not covered by tests
f"Unexpected second body element of /pages/{page.id}/contents, "
"no @target property"
)
if tree["body"][1]["@target"] != "toc":
raise LibreTextsParsingError(

Check warning on line 309 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L309

Added line #L309 was not covered by tests
f"Unexpected second body element of /pages/{page.id}/contents, "
f"@target property is '{tree["body"][1]["@target"]}' while only 'toc' "
"is expected"
)
return LibraryPageContent(html_body=tree["body"][0])

Check warning on line 314 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L314

Added line #L314 was not covered by tests


def _get_soup(content: str) -> BeautifulSoup:
"""Return a BeautifulSoup soup from textual content
Expand Down
48 changes: 32 additions & 16 deletions scraper/src/libretexts2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
LibreTextsMetadata,
)
from libretexts2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger
from libretexts2zim.ui import ConfigModel, HomeModel, SharedModel
from libretexts2zim.ui import (
ConfigModel,
PageContentModel,
PageModel,
SharedModel,
)
from libretexts2zim.zimconfig import ZimConfig


Expand Down Expand Up @@ -260,20 +265,6 @@
stream_file(home.welcome_image_url, byte_stream=welcome_image)
add_item_for(creator, "content/logo.png", content=welcome_image.getvalue())
del welcome_image
add_item_for(
creator,
"content/shared.json",
content=SharedModel(logo_path="content/logo.png").model_dump_json(
by_alias=True
),
)
add_item_for(
creator,
"content/home.json",
content=HomeModel(
welcome_text_paragraphs=home.welcome_text_paragraphs
).model_dump_json(by_alias=True),
)

logger.info(f"Adding Vue.JS UI files in {self.zimui_dist}")
for file in self.zimui_dist.rglob("*"):
Expand All @@ -287,7 +278,8 @@
creator=creator,
path=path,
content=index_html_path.read_text(encoding="utf-8").replace(
"<title>Vite App</title>", formatted_config.title_format
"<title>Vite App</title>",
f"<title>{formatted_config.title_format}</title>",
),
mimetype="text/html",
is_front=True,
Expand All @@ -307,5 +299,29 @@
f"{len(selected_pages)} pages (out of {len(pages_tree.pages)}) will be "
"fetched and pushed to the ZIM"
)
add_item_for(

Check warning on line 302 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L302

Added line #L302 was not covered by tests
creator,
"content/shared.json",
content=SharedModel(
logo_path="content/logo.png",
root_page_path=selected_pages[0].path, # root is always first
pages=[
PageModel(id=page.id, title=page.title, path=page.path)
for page in selected_pages
],
).model_dump_json(by_alias=True),
)

logger.info("Fetching pages content")

Check warning on line 315 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L315

Added line #L315 was not covered by tests
for page in selected_pages:
logger.debug(f" Fetching {page.id}")
page_content = self.libretexts_client.get_page_content(page)
add_item_for(

Check warning on line 319 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L317-L319

Added lines #L317 - L319 were not covered by tests
creator,
f"content/page_content_{page.id}.json",
content=PageContentModel(
html_body=page_content.html_body
).model_dump_json(by_alias=True),
)

return zim_path
12 changes: 10 additions & 2 deletions scraper/src/libretexts2zim/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,20 @@ class CamelModel(BaseModel):
model_config = ConfigDict(alias_generator=camelize, populate_by_name=True)


class HomeModel(CamelModel):
welcome_text_paragraphs: list[str]
class PageModel(CamelModel):
id: str
title: str
path: str


class PageContentModel(CamelModel):
html_body: str


class SharedModel(CamelModel):
logo_path: str
root_page_path: str
pages: list[PageModel]


class ConfigModel(CamelModel):
Expand Down
5 changes: 5 additions & 0 deletions scraper/tests-integration/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ def test_get_home_welcome_text_paragraphs(
"""Ensures proper data is retrieved from home of libretexts"""

assert home.welcome_text_paragraphs == home_welcome_text_paragraphs


def test_get_home_page_content(client: LibreTextsClient, page_tree: LibraryTree):
"""Ensures we can get content of root page"""
assert client.get_page_content(page_tree.root).html_body
36 changes: 21 additions & 15 deletions scraper/tests-integration/test_zim_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,22 @@ def test_zim_content_logo_png(zim_fh: Archive, home_png_size: int):
assert len(logo_png.content) == home_png_size # pyright: ignore


def test_zim_content_home_json(
zim_fh: Archive, home_welcome_text_paragraphs: list[str]
):
"""Ensure proper content at content/home.json"""

home_json = zim_fh.get_item("content/home.json")
assert home_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(home_json.content)) == { # pyright: ignore
"welcomeTextParagraphs": home_welcome_text_paragraphs
}


def test_zim_content_shared_json(zim_fh: Archive):
"""Ensure proper content at content/shared.json"""

shared_json = zim_fh.get_item("content/shared.json")
assert shared_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(shared_json.content)) == { # pyright: ignore
"logoPath": "content/logo.png"
}
shared_content = json.loads(bytes(shared_json.content)) # pyright: ignore
shared_content_keys = shared_content.keys()
assert "logoPath" in shared_content_keys
assert "rootPagePath" in shared_content_keys
assert "pages" in shared_content_keys
assert len(shared_content["pages"]) == 4
for page in shared_content["pages"]:
shared_content_page_keys = page.keys()
assert "id" in shared_content_page_keys
assert "title" in shared_content_page_keys
assert "path" in shared_content_page_keys


def test_zim_content_config_json(zim_fh: Archive):
Expand All @@ -86,3 +82,13 @@ def test_zim_content_config_json(zim_fh: Archive):
assert json.loads(bytes(config_json.content)) == { # pyright: ignore
"secondaryColor": "#FFFFFF"
}


@pytest.mark.parametrize("page_id", [28207, 28208, 28209, 28212])
def test_zim_content_page_content_json(page_id: str, zim_fh: Archive):
"""Ensure proper content at content/config.json"""

config_json = zim_fh.get_item(f"content/page_content_{page_id}.json")
assert config_json.mimetype == "application/json" # pyright: ignore
page_content_keys = json.loads(bytes(config_json.content)).keys() # pyright: ignore
assert "htmlBody" in page_content_keys
38 changes: 25 additions & 13 deletions scraper/tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,42 @@

@pytest.fixture(scope="module")
def library_tree() -> LibraryTree:
root = LibraryPage(id="24", title="Home page")
topic1 = LibraryPage(id="25", title="1: First topic", parent=root)
root = LibraryPage(id="24", title="Home page", path="")
topic1 = LibraryPage(
id="25", title="1: First topic", path="1_First_Topic", parent=root
)
root.children.append(topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", parent=topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", path="1.1_Cloud", parent=topic1)
topic1.children.append(topic1_1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", parent=topic1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", path="1.2_Tree", parent=topic1)
topic1.children.append(topic1_2)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", parent=topic1)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", path="1.3_Bees", parent=topic1)
topic1.children.append(topic1_3)
topic2 = LibraryPage(id="29", title="2: Second topic", parent=root)
topic2 = LibraryPage(
id="29", title="2: Second topic", path="2_Second_Topic", parent=root
)
root.children.append(topic2)
topic2_1 = LibraryPage(id="30", title="2.1: Underground", parent=topic2)
topic2_1 = LibraryPage(
id="30", title="2.1: Underground", path="2.1_Underground", parent=topic2
)
topic2.children.append(topic2_1)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", parent=topic2)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", path="2.2_Lava", parent=topic2)
topic2.children.append(topic2_2)
topic2_3 = LibraryPage(id="32", title="2.3: Volcano", parent=topic2)
topic2_3 = LibraryPage(
id="32", title="2.3: Volcano", path="2.3_Volcano", parent=topic2
)
topic2.children.append(topic2_3)
topic3 = LibraryPage(id="33", title="3: Third topic", parent=root)
topic3 = LibraryPage(
id="33", title="3: Third topic", path="3_Third_Topic", parent=root
)
root.children.append(topic3)
topic3_1 = LibraryPage(id="34", title="3.1: Ground", parent=topic3)
topic3_1 = LibraryPage(
id="34", title="3.1: Ground", path="3.1_Ground", parent=topic3
)
topic3.children.append(topic3_1)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", parent=topic3)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", path="3.2_Earth", parent=topic3)
topic3.children.append(topic3_2)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", parent=topic3)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", path="3.3_Sky", parent=topic3)
topic3.children.append(topic3_3)
return LibraryTree(
root=root,
Expand Down
14 changes: 8 additions & 6 deletions zimui/cypress/e2e/home.cy.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
describe('Home of the ZIM UI', () => {
beforeEach(() => {
cy.intercept('GET', '/content/home.json', { fixture: 'home.json' }).as('getHome')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/config.json', { fixture: 'config.json' }).as('getConfig')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/page_content_123.json', { fixture: 'page_content_123.json' }).as(
'getPage'
)
cy.visit('/')
cy.wait('@getHome')
cy.wait('@getShared')
cy.wait('@getConfig')
cy.wait('@getShared')
cy.wait('@getPage')
})

it('loads the proper header image', () => {
Expand All @@ -17,8 +19,8 @@ describe('Home of the ZIM UI', () => {
})

it('loads the first paragraph only once', () => {
cy.contains('p', 'Paragraph 2').should('be.visible')
cy.get('p:contains("Paragraph 2")').should('have.length', 1)
cy.contains('p', 'Paragraph 1').should('be.visible')
cy.get('p:contains("Paragraph 1")').should('have.length', 1)
})

it('loads the second paragraph only once', () => {
Expand Down
1 change: 0 additions & 1 deletion zimui/cypress/fixtures/home.json

This file was deleted.

3 changes: 3 additions & 0 deletions zimui/cypress/fixtures/page_content_123.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"htmlBody": "<p>Paragraph 1</p><p>Paragraph 2</p>"
}
12 changes: 11 additions & 1 deletion zimui/cypress/fixtures/shared.json
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
{ "logoPath": "content/logo.png" }
{
"logoPath": "content/logo.png",
"rootPagePath": "a_folder/a_page",
"pages": [
{
"id": "123",
"title": "A page title",
"path": "a_folder/a_page"
}
]
}
1 change: 1 addition & 0 deletions zimui/public/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
content
2 changes: 1 addition & 1 deletion zimui/src/components/__tests__/HeaderBar.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ describe('HeaderBar', () => {
})
const main = useMainStore()
const logoPath = 'content/logo.png'
main.shared = { logoPath: logoPath }
main.shared = { logoPath: logoPath, rootPagePath: '', pages: [] }

const wrapper = mount(HeaderBar, {
global: {
Expand Down
2 changes: 1 addition & 1 deletion zimui/src/router/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const router = createRouter({
history: createWebHashHistory(),
routes: [
{
path: '/',
path: '/:pathMatch(.*)',
name: 'home',
component: HomeView
}
Expand Down
Loading