-
Notifications
You must be signed in to change notification settings - Fork 1
/
csv_to_concepts_scheme.py
149 lines (119 loc) · 4.33 KB
/
csv_to_concepts_scheme.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#imports
from fileinput import close
from json.tool import main
from platform import node
from re import sub
from termios import ICRNL
from types import new_class
from uuid import uuid1, uuid4
import uuid
import csv
from django.core.management.base import BaseCommand, CommandError
from numpy import append
from arches.app.models.concept import Concept, ConceptValue
import pandas as pd
class Command(BaseCommand):
"""
Description:
This command takes a .csv file containing a list of countries to create Concept Schemes based on them
Example:
header1,header2,header3
data1,data2,data3
data1,data2,data3
data1,data2,data3
Parameters:
'-s': path to .csv
Returns:
Saves concept schemes to database
"""
def add_arguments(self, parser):
parser.add_argument("-s", "--source", action="store", dest="file_path", default="", help="File path to csv containing ResourceID's")
def handle(self, *args, **options):
#Load CSV filepath
csv_path = options['file_path']
#Get data to process
data_tuple = get_data_and_header(csv_path) #splits the csv into title and a list of countries
headers = data_tuple[0]
values = data_tuple[1]
#Create parent concepts for each header
for index, header in enumerate(headers):
main_concept = create_concept("ConceptScheme", header) # creates a the main concepts
main_concept.save()
#create a concept for each item in the data
subconcepts = []
for value in values[index]:
subconcepts.append(create_concept("Concept", str(value), main_concept))
#append all created concepts to parent concept
main_concept.subconcepts = subconcepts
#Save parent concept to db
main_concept.save()
#Create a collection from parent concept
main_concept.make_collection()
def create_concept(concept_type, data, parent_concept = None):
'''
Descriptions:
Function to create a concept and its value (if needed)
Parameters:
:concept_type: string deciding what type of concept is to be made
:data: string containing the data to be stored in concept.data
:parent_concept: a Concept object to be referenced as a parent for the new Concept being created
Returns:
:c: the concept object being created
'''
#Create the base concept
c = Concept()
c.id = str(uuid.uuid1())
c.nodetype = concept_type
c.conceptid = str(uuid.uuid1())
c.nodetype_id = concept_type
c.legacyoid = data
c.save()
#If function has parent concept create a child
if parent_concept:
#Get above basic concept from db
stub_parent_concept = Concept().get(
id=parent_concept.id,
include_subconcepts=False,
include_parentconcepts=False,
include_relatedconcepts=False,
depth_limit=None,
up_depth_limit=None,
)
stub_parent_concept.relationshiptype = 'hasTopConcept'
c.parentconcepts.append(stub_parent_concept)
c.relationshiptype = 'hasTopConcept'
#Else create parent
else:
c.relationshiptype = ''
c.hassubconcepts = True
#Create a value for the concept
val = ConceptValue()
val.conceptid = c.id
val.type = 'prefLabel'
val.category = "label"
val.value = data
val.language = 'en'
val.save()
#Add the create value to concept
c.values.append(val)
#Return just concept
return c
def get_data_and_header(csv_path):
'''
Description:
Function to take a one column list stored as a CSV and return a tuple with
a header and dataunder the header as list
Parameters:
:csv_path: Filepath to the csv file
Returns:
:tuple: containing Header of the CSV file
'''
#Open the csv file as a dataframe
csv_df = pd.read_csv(csv_path)
#Cast dataframe column names to list
headers = list(csv_df.columns)
#Populate an array for each column
values = []
for col in csv_df:
values.append(list(csv_df[col]))
return (headers, values)