Skip to content

Commit

Permalink
Merge pull request #24 from t-sagara/v2_dev
Browse files Browse the repository at this point in the history
Merge v2_dev
  • Loading branch information
t-sagara authored Apr 13, 2024
2 parents 9ddf144 + 16168a5 commit a8ef20f
Show file tree
Hide file tree
Showing 10 changed files with 472 additions and 554 deletions.
2 changes: 2 additions & 0 deletions flask-demo/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jageocoder
Flask-Cors
2 changes: 1 addition & 1 deletion jageocoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
>>> jageocoder.searchNode('<Japanese-address>')
"""

__version__ = '2.1.2' # The package version
__version__ = '2.1.4' # The package version
__dictionary_version__ = '20230927' # Compatible dictionary version
__author__ = 'Takeshi Sagara <[email protected]>'

Expand Down
158 changes: 31 additions & 127 deletions jageocoder/aza_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,80 +26,15 @@ class AzaMaster(BaseTable):
Standardized names for retrieval
aza_class: int
町字区分コード
1:大字・町, 2:丁目, 3:小字
pref: str
都道府県名
pref_kana: str
都道府県名_カナ
pref_eng: str
都道府県名_英字
county: str
郡名
county_kana: str
郡名_カナ
county_eng: str
郡名_英字
city: str
市区町村名
city_kana: str
市区町村名_カナ
city_eng: str
市区町村名_英字
ward: str
政令市区名
ward_kana: str
政令市区名_カナ
ward_eng: str
政令市区名_英字
oaza: str
大字・町名
oaza_kana: str
大字・町名_カナ
oaza_eng: str
大字・町名_英字
chome: str
丁目名
chome_kana: str
丁目名_カナ
chome_num: str
丁目名_数字
koaza: str
小字名
koaza_kana: str
小字名_カナ
koaza_eng: str
小字名_英字
1:大字・町, 2:丁目, 3:小字, 4:なし, 5:道路方式の道路名
is_jukyo: bool
住居表示フラグ
jukyo_code: int
住居表示方式コード
1:街区方式, 2:道路方式, 0:住居表示でない
is_oaza_alias: bool
大字・町_通称フラグ
is_koaza_alias: bool
小字_通称フラグ
is_oaza_gaiji: bool
大字・町_外字フラグ
is_koaza_gaiji: bool
小字_外字フラグ
status: int
状態フラグ
0:自治体確認待ち, 1:地方自治法の町字に該当, 2:地方自治法の町字に非該当, 3:不明
1:住居表示実施, 0:住居表示非実施, 2:実施・非実施区域が併存
start_count_type: int
起番フラグ
1:起番, 2:非起番, 0:登記情報に存在しない
valid_from: date
効力発生日
valid_to: date
廃止日
reference_code: int
原典資料コード
1:自治体資料, 11:位置参照情報・自治体資料, 12:位置参照情報・街区レベル,
13:位置参照情報・1/2500地形図, 10:位置参照情報・その他資料, 0:その他資料
postcode: str
郵便番号(セミコロン区切り)
note: str
備考
"""

__tablename__ = "aza_master"
Expand All @@ -123,44 +58,13 @@ class AzaMaster(BaseTable):
def from_csvrow(self, row: dict) -> dict:
names = self.get_names_from_csvrow(row)
aza_master_row = {
"code": row["全国地方公共団体コード"][0:5] + row["町字id"],
"code": row["lg_code"][0:5] + row["machiaza_id"],
"names": json.dumps(names, ensure_ascii=False),
"namesIndex": self.__class__.standardize_aza_name(names),
"azaClass": row.get("町字区分コード"),
# "pref": row["都道府県名"],
# "pref_kana": row.get("都道府県名_カナ", ""),
# "pref_eng": row.get("都道府県名_英字", ""),
# "county": row.get("郡名", ""),
# "county_kana": row.get("郡名_カナ", ""),
# "county_eng": row.get("郡名_英字", ""),
# "city": row.get("市区町村名", ""),
# "city_kana": row.get("市区町村名_カナ", ""),
# "city_eng": row.get("市区町村名_英字", ""),
# "ward": row.get("政令市区名", ""),
# "ward_kana": row.get("政令市区名_カナ", ""),
# "ward_eng": row.get("政令市区名_英字", ""),
# "oaza": row.get("大字・町名", ""),
# "oaza_kana": row.get("大字・町名_カナ", ""),
# "oaza_eng": row.get("大字・町名_英字", ""),
# "chome": row.get("丁目名", ""),
# "chome_kana": row.get("丁目名_カナ", ""),
# "chome_num": row.get("丁目名_数字", ""),
# "koaza": row.get("小字名", ""),
# "koaza_kana": row.get("小字名_カナ", ""),
# "koaza_eng": row.get("小字名_英字", ""),
"isJukyo": row.get("住居表示フラグ", "") == "1",
# "jukyo_code": row.get("住居表示方式コード"),
# "is_oaza_alias": row.get("大字・町_通称フラグ", "") == "1",
# "is_koaza_alias": row.get("小字_通称フラグ", "") == "1",
# "is_oaza_gaiji": row.get("大字・町_外字フラグ", "") == "1",
# "is_koaza_gaiji": row.get("小字_外字フラグ", "") == "1",
# "status": row.get("状態フラグ"),
"startCountType": row.get("起番フラグ"),
# "valid_from": row.get("効力発生日"),
# "valid_to": row.get("廃止日"),
# "reference_code": row.get("原典資料コード"),
"postcode": row.get("郵便番号"),
# "note": row.get("備考"),
"azaClass": row.get("machiaza_type"),
"isJukyo": row.get("rsdt_addr_flg", "") == "1",
"startCountType": row.get("wake_num_flg"),
"postcode": row.get("post_code"),
}
for key in ("azaClass", "jukyoCode", "status",
"startCountType", "referenceCode",):
Expand All @@ -187,75 +91,75 @@ def from_csvrow(self, row: dict) -> dict:
return aza_master_row

def get_names_from_csvrow(self, row: dict) -> list:
code = row["全国地方公共団体コード"][0:5] + row["町字id"]
code = row["lg_code"][0:5] + row["machiaza_id"]
names = []
pref = row['都道府県名']
pref = row["pref"]
if pref:
names.append([
AddressLevel.PREF,
pref,
row['都道府県名_カナ'],
row['都道府県名_英字'],
row["pref_kana"],
row["pref_roma"],
code[0:2]])

county = row['郡名']
county = row["county"]
if county:
names.append([
AddressLevel.COUNTY,
county,
row['郡名_カナ'],
row['郡名_英字'],
row["county_kana"],
row["county_roma"],
code[0:3]])

city = row['市区町村名']
ward = row['政令市区名']
city = row["city"]
ward = row["ward"]
if ward:
names.append([
AddressLevel.CITY,
city,
row['市区町村名_カナ'],
row['市区町村名_英字'],
row["city_kana"],
row["city_roma"],
code[0:3]])

names.append([
AddressLevel.WARD,
ward,
row['政令市区名_カナ'],
row['政令市区名_英字'],
row["ward_kana"],
row["ward_roma"],
code[0:5]])
else:
names.append([
AddressLevel.CITY,
city,
row['市区町村名_カナ'],
row['市区町村名_英字'],
row["city_kana"],
row["city_roma"],
code[0:5]])

oaza = row['大字・町名']
oaza = row["oaza_cho"]
if oaza:
names.append([
AddressLevel.OAZA,
oaza,
row['大字・町名_カナ'],
row['大字・町名_英字'],
row["oaza_cho_kana"],
row["oaza_cho_roma"],
code[0:9]])

chome = row['丁目名']
chome = row["chome"]
if chome:
names.append([
AddressLevel.AZA,
chome,
row['丁目名_カナ'],
row['丁目名_数字'] + 'chome',
row["chome_kana"],
row["chome_number"] + 'chome',
code])

aza = row['小字名']
aza = row["koaza"]
if aza:
names.append([
AddressLevel.AZA,
aza,
row['小字名_カナ'],
row['小字名_英字'],
row["koaza_kana"],
row["koaza_roma"],
code])

return names
Expand Down
9 changes: 5 additions & 4 deletions jageocoder/itaiji.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ def set_options(self, options: dict):
# Patterns that cannot be omitted as AZA names
hyphens = re.escape(strlib.hyphen)
self.re_not_ommisible_aza_patterns = re.compile(
'(' +
r'([^。、,.0-9a-zA-Z\t\n\r\f\v]{,15}?)(' +
rf'{numbers}[条線丁区番号{hyphens}]|' +
rf'[{self.chiban_heads}]{numbers}|' +
rf'{numbers}$' +
')'
r')'
)

# Patterns that do not follow behind nodes at that level
Expand Down Expand Up @@ -475,11 +475,12 @@ def optional_aza_len(self, string: str, pos: int = 0) -> int:
int
Number of characters that can be omitted.
"""
m = self.re_not_ommisible_aza_patterns.search(string[pos:])
m = self.re_not_ommisible_aza_patterns.match(string[pos:])
if m is None:
return 0

n = string[pos:].find(m.group(0))
n = len(m.group(1))
# n = string[pos:].find(m.group(0))
return n

candidates = []
Expand Down
Loading

0 comments on commit a8ef20f

Please sign in to comment.