Skip to content

Commit

Permalink
feat: automate text direction extracting from CLDR
Browse files Browse the repository at this point in the history
This fills is the data for all CLDR languages making issues like #1477
less likely to happen.
  • Loading branch information
nijel committed Sep 30, 2024
1 parent 826f086 commit af29b70
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 4 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

all: weblate_language_data/languages.py weblate_language_data/plural_tags.py PLURALS_DIFF.md $(wildcard weblate_language_data/locale/*/LC_MESSAGES/django.po) $(filter-out $(patsubst modules/cldr-json/cldr-json/cldr-localenames-full/main/%/languages.json,languages-po/%.po,$(wildcard modules/cldr-json/cldr-json/cldr-localenames-full/main/*/languages.json)),languages-po/en.po)

weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
weblate_language_data/languages.py: languages.csv aliases.csv cldr.csv extraplurals.csv default_countries.csv population.csv qt.csv rtl.csv $(wildcard modules/iso-codes/data/iso_*.json) scripts/generate-language-data
./scripts/generate-language-data

PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv scripts/list-diff
Expand All @@ -14,6 +14,9 @@ PLURALS_DIFF.md: languages.csv cldr.csv gettext.csv l10n-guide.csv translate.csv
cldr.csv: modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json scripts/export-cldr
./scripts/export-cldr

rtl.csv: modules/cldr-json/cldr-json/cldr-misc-full/main/*/layout.json scripts/export-cldr-orientation languages.csv
./scripts/export-cldr-orientation

qt.csv: modules/qttools/src/linguist/shared/numerus.cpp scripts/export-qt languages.csv
./scripts/export-qt

Expand Down
53 changes: 53 additions & 0 deletions rtl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
code
ae
aii
ajp
apc
ar
ar_BH
ar_DZ
ar_EG
ar_KW
ar_LY
ar_MA
ar_SA
ar_YE
ara
arc
ave
bal
bgn
bqi
ckb
ckb_IR
dv
egy
fa
fa_AF
fas
ha
he
heb
khw
ks
lrc
luz
ms_Arab
mzn
nqo
pal
per
phn
ps
rhg
sam
sd
sdh
skr
syc
syr
ug
ur
ur_IN
urd
yi
45 changes: 45 additions & 0 deletions scripts/export-cldr-orientation
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#! /usr/bin/env python3

# Copyright © Michal Čihař <[email protected]>
#
# SPDX-License-Identifier: MIT

import json
from pathlib import Path
import csv

# Read languages
with open("languages.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
next(reader)
LANGUAGES = list(reader)
LANGUAGE_CODES = {lang[0] for lang in LANGUAGES}

# Read RTL
with open("rtl.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
next(reader)
RTLS = list(reader)
RTL_CODES = {lang[0] for lang in RTLS}

LAYOUTDIR = Path("modules/cldr-json/cldr-json/cldr-misc-full/main/")

for layout_file in LAYOUTDIR.glob("*/layout.json"):
json_text = layout_file.read_text()
data = json.loads(json_text)
for key, value in data["main"].items():
code = key.replace("-", "_")
if code not in LANGUAGE_CODES:
continue
character_order = value["layout"]["orientation"]["characterOrder"]
if character_order == "right-to-left":
RTL_CODES.add(code)
elif character_order != "left-to-right":
print(f"Uknown order for {code}: {character_order})")

print(RTL_CODES)

with open("rtl.csv", "w") as handle:
handle.write("code\n")
for code in sorted(RTL_CODES):
handle.write(f"{code}\n")
17 changes: 16 additions & 1 deletion scripts/generate-language-data
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,19 @@ with open("cldr.csv") as csvfile:
if existing != number:
CLDRPLURALS.append((code, LANGUAGE_NAMES[code], number, equation))

# Read extra plurals
# Read default countries
with open("default_countries.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
next(reader)
DEFAULT_COUNTRIES = list(reader)

# Read RTL
with open("rtl.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
next(reader)
RTLS = list(reader)
RTL_CODES = {lang[0] for lang in RTLS}

# Write language definitions
with open("weblate_language_data/languages.py", "w") as output:
output.write(HEADER)
Expand Down Expand Up @@ -195,6 +202,13 @@ with open("weblate_language_data/countries.py", "w") as output:
for row in DEFAULT_COUNTRIES:
output.write(" '{}',\n".format(*row))
output.write(")\n")
with open("weblate_language_data/rtl.py", "w") as output:
output.write(HEADER)
output.write("# List of RTL languages\n")
output.write("RTL_LANGS = {\n")
for code in sorted(RTL_CODES):
output.write(f' "{code}",\n')
output.write("}\n")

# Generate same check blacklist
words = set()
Expand Down Expand Up @@ -322,6 +336,7 @@ subprocess.run(
"pre-commit",
"run",
"--files",
"weblate_language_data/rtl.py",
"weblate_language_data/countries.py",
"weblate_language_data/aliases.py",
"weblate_language_data/plurals.py",
Expand Down
30 changes: 28 additions & 2 deletions weblate_language_data/rtl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,43 @@
#
# SPDX-License-Identifier: MIT

"""
Language data definitions.
This is an automatically generated file, see scripts/generate-language-data
Do not edit, please adjust language definitions in following repository:
https://github.com/WeblateOrg/language-data
"""
# pylint: disable=line-too-long,too-many-lines

# List of RTL languages
RTL_LANGS = {
"ae",
"aii",
"ajp",
"apc",
"ar",
"ar_BH",
"ar_DZ",
"ar_EG",
"ar_KW",
"ar_LY",
"ar_MA",
"ar_SA",
"ar_YE",
"ara",
"arc",
"ae",
"aii",
"ave",
"bal",
"bgn",
"bqi",
"ckb",
"ckb_IR",
"dv",
"egy",
"fa",
"fa_AF",
"fas",
"ha",
"he",
Expand All @@ -26,18 +48,22 @@
"lrc",
"luz",
"ms_Arab",
"mzn",
"nqo",
"pal",
"per",
"phn",
"ps",
"rhg",
"sam",
"sd",
"sdh",
"skr",
"syc",
"syr",
"ug",
"ur",
"ur_IN",
"urd",
"yi",
}

0 comments on commit af29b70

Please sign in to comment.