Skip to content

Commit

Permalink
add facility to import grantmaking segments from ukgrantmaking
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Oct 4, 2024
1 parent 9e76592 commit 52b0c54
Showing 1 changed file with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions charity/management/commands/import_ukg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import io

from django.utils.text import slugify
from openpyxl import load_workbook

from ftc.management.commands._base_scraper import BaseScraper
from ftc.models import OrganisationClassification, Vocabulary, VocabularyEntries

UKG = "ukg"


class Command(BaseScraper):
name = "ukgrantmaking"
allowed_domains = ["ukgrantmaking.org"]
start_urls = [
"https://www.ukgrantmaking.org/wp-content/uploads/2024/06/uk-grantmaking-all-grantmakers-2022-23.xlsx",
]
source = {
"title": "UK Grantmaking Segments",
"description": "",
"identifier": "ukcat",
"license": "http://creativecommons.org/licenses/by/4.0/",
"license_name": "Creative Commons Attribution 4.0 International License",
"issued": "",
"modified": "",
"publisher": {
"name": "360Giving",
"website": "https://www.threesixtygiving.org/",
},
"distribution": [
{
"downloadURL": "",
"accessURL": "https://www.ukgrantmaking.org/report/2024/methodology-data/#Full%20data",
"title": "UK Grantmaking - Methodology and data",
}
],
}
vocab = {
UKG: {
"title": "UK Grantmaking Segment",
"description": """Categories and segments from UK Grantmaking, the definitive annual publication on grant funding in the UK.
It is a unique cross-sector collaboration between 360Giving, the Association of Charitable Foundations, the Association of Charitable Organisations, UK Community Foundations and London Funders.
Find out more at [ukgrantmaking.org](https://www.ukgrantmaking.org/).
""",
},
}
models_to_delete = [OrganisationClassification]

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.intialise_vocabulary()

def intialise_vocabulary(self):
self.logger.info("Fetching ICNPTSO")
vocab, _ = Vocabulary.objects.update_or_create(
slug=UKG,
defaults=dict(
description=self.vocab[UKG]["description"],
title=self.vocab[UKG]["title"],
single=False,
),
)
self.vocab_obj = vocab
VocabularyEntries.objects.filter(vocabulary=vocab).update(current=False)
self.vocab_cache = {}

def parse_file(self, response, source_url):
wb = load_workbook(io.BytesIO(response.content), read_only=True)
latest_sheet = wb.active
headers = []
for k, row in enumerate(latest_sheet.rows):
if not row[0].value:
continue

if not headers:
headers = [c.value.lower() for c in row]
else:
record = dict(zip(headers, [c.value for c in row]))
self.parse_row(record)

def parse_row(self, record):
segment = record.get("segment")
category = record.get("category")

if (category, segment) in self.vocab_cache:
vocabulary_id = self.vocab_cache[(category, segment)]
else:
category_obj, _ = VocabularyEntries.objects.update_or_create(
vocabulary=self.vocab_obj,
code=slugify(category),
parent=None,
defaults=dict(
title=category,
current=True,
),
)
segment_obj, _ = VocabularyEntries.objects.update_or_create(
vocabulary=self.vocab_obj,
code=slugify(category) + "-" + slugify(segment),
parent=category_obj,
defaults=dict(
title=segment,
current=True,
),
)
vocabulary_id = segment_obj.id
self.vocab_cache[(category, segment)] = vocabulary_id

self.add_record(
OrganisationClassification,
{
"org_id": record["org_id"],
"vocabulary_id": vocabulary_id,
"spider": self.name,
"source": self.source,
"scrape": self.scrape,
},
)

0 comments on commit 52b0c54

Please sign in to comment.