diff --git a/.gitignore b/.gitignore index 4c5a89e..d2f30f6 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,15 @@ utils/fragility_utils/data/ # token *incoretoken* + +# gis data +utils/geoserver_utils/data/ +utils/gis_utils/*.dbf +utils/gis_utils/*.prj +utils/gis_utils/*.shp +utils/gis_utils/*.shx +utils/gis_utils/*.geobuf +utils/gis_utils/*.geojson +utils/gis_utils/*.json +utils/gis_utils/*.csv +utils/gis_utils/*.geoparquet \ No newline at end of file diff --git a/utils/geoserver_utils/change_workspace_in_data_directory.py b/utils/geoserver_utils/change_workspace_in_data_directory.py new file mode 100644 index 0000000..20eaa7b --- /dev/null +++ b/utils/geoserver_utils/change_workspace_in_data_directory.py @@ -0,0 +1,169 @@ +import os +import shutil +import xml.etree.ElementTree as ET + +# Define paths +data_dir = r"C:/Users/ywkim/Downloads/geoserver-2.22.4-bin/data_dir" +old_workspace = "incore" # Existing workspace +new_workspace = "ywkim" # New workspace +# datastore_name = "66ce2d4ccdfc8b4e6928de45" # The datastore or coverage store name +datastore_name = "66cf726a81983b19154d60f7" + +# Paths for old and new workspace directories +old_workspace_path = os.path.join(data_dir, "workspaces", old_workspace, datastore_name) +new_workspace_path = os.path.join(data_dir, "workspaces", new_workspace, datastore_name) +new_workspace_xml_path = os.path.join(data_dir, "workspaces", new_workspace, "workspace.xml") +new_namespace_xml_path = os.path.join(data_dir, "workspaces", new_workspace, "namespace.xml") + +# Paths for old and new data directories +old_data_path = os.path.join(data_dir, "data", old_workspace, datastore_name) +new_data_path = os.path.join(data_dir, "data", new_workspace, datastore_name) + + +# Function to get the workspace ID from the workspace.xml file +def get_workspace_id(workspace_xml_path): + tree = ET.parse(workspace_xml_path) + root = tree.getroot() + workspace_id = root.find("id").text + return workspace_id + + +# Function to get the namespace ID from the namespace.xml file +def get_namespace_id(namespace_xml_path): + tree = ET.parse(namespace_xml_path) + root = tree.getroot() + namespace_id = root.find("id").text + return namespace_id + + +# Step 2: Move the datastore/coveragestore directory from the old workspace to the new workspace +def move_datastore_or_coveragestore(): + if not os.path.exists(new_workspace_path): + shutil.move(old_workspace_path, new_workspace_path) + print(f"Moved store from {old_workspace_path} to {new_workspace_path}.") + else: + print(f"Store already exists at {new_workspace_path}. No move needed.") + + if not os.path.exists(new_data_path): + shutil.move(old_data_path, new_data_path) + print(f"Moved data from {old_data_path} to {new_data_path}.") + else: + print(f"Data already exists at {new_data_path}. No move needed.") + + +# Step 3: Edit the datastore.xml or coveragestore.xml with the new workspace ID and update the path +def update_store_xml(): + datastore_xml_path = os.path.join(new_workspace_path, "datastore.xml") + coveragestore_xml_path = os.path.join(new_workspace_path, "coveragestore.xml") + + if os.path.exists(datastore_xml_path): + update_datastore_xml(datastore_xml_path) + elif os.path.exists(coveragestore_xml_path): + update_coveragestore_xml(coveragestore_xml_path) + else: + print(f"Neither datastore.xml nor coveragestore.xml found in {new_workspace_path}.") + + +def update_datastore_xml(datastore_xml_path): + # Get the new workspace ID + new_workspace_id = get_workspace_id(new_workspace_xml_path) + + tree = ET.parse(datastore_xml_path) + root = tree.getroot() + + # Update the workspace ID + workspace = root.find(".//workspace/id") + if workspace is not None: + workspace.text = new_workspace_id + + # Update the namespace URI + namespace = root.find(".//entry[@key='namespace']") + if namespace is not None: + namespace.text = f"http://{new_workspace}" + + # Update the datastore's URL to point to the new path + url_entry = root.find(".//entry[@key='url']") + if url_entry is not None: + corrected_datastore_path = new_data_path.replace("\\", "/") + url_entry.text = f"file:/{corrected_datastore_path}/" + + # Save the updated XML + tree.write(datastore_xml_path) + print(f"Updated {datastore_xml_path} with new workspace ID and namespace.") + + +def update_coveragestore_xml(coveragestore_xml_path): + # Get the new workspace ID + new_workspace_id = get_workspace_id(new_workspace_xml_path) + + tree = ET.parse(coveragestore_xml_path) + root = tree.getroot() + + # Update the workspace ID + workspace = root.find(".//workspace/id") + if workspace is not None: + workspace.text = new_workspace_id + + # Update the coveragestore's URL to point to the new relative path + url_entry = root.find(".//url") + if url_entry is not None: + relative_path = os.path.join("data", new_workspace, datastore_name, os.path.basename(url_entry.text)) + relative_path = relative_path.replace("\\", "/") + url_entry.text = f"file:{relative_path}" + + # Save the updated XML + tree.write(coveragestore_xml_path) + print(f"Updated {coveragestore_xml_path} with new workspace ID and path.") + + +# Step 4: Edit all layers' featuretype.xml or coverage.xml with the new namespace ID +def update_layer_xml(): + # Get the new namespace ID + new_namespace_id = get_namespace_id(new_namespace_xml_path) + + for layer_dir in os.listdir(new_workspace_path): + featuretype_xml_path = os.path.join(new_workspace_path, layer_dir, "featuretype.xml") + coverage_xml_path = os.path.join(new_workspace_path, layer_dir, "coverage.xml") + + if os.path.exists(featuretype_xml_path): + update_featuretype_xml(featuretype_xml_path, new_namespace_id) + elif os.path.exists(coverage_xml_path): + update_coverage_xml(coverage_xml_path, new_namespace_id) + else: + print(f"Neither featuretype.xml nor coverage.xml found in {layer_dir}.") + + +def update_featuretype_xml(featuretype_xml_path, new_namespace_id): + tree = ET.parse(featuretype_xml_path) + root = tree.getroot() + + # Update the namespace ID + namespace = root.find(".//namespace/id") + if namespace is not None: + namespace.text = new_namespace_id + + # Save the updated XML + tree.write(featuretype_xml_path) + print(f"Updated {featuretype_xml_path} with new namespace ID.") + + +def update_coverage_xml(coverage_xml_path, new_namespace_id): + tree = ET.parse(coverage_xml_path) + root = tree.getroot() + + # Update the namespace ID + namespace = root.find(".//namespace/id") + if namespace is not None: + namespace.text = new_namespace_id + + # Save the updated XML + tree.write(coverage_xml_path) + print(f"Updated {coverage_xml_path} with new namespace ID.") + + +# Run the steps +move_datastore_or_coveragestore() +update_store_xml() +update_layer_xml() + +print("Store and layers updated successfully.") diff --git a/utils/geoserver_utils/migrate_workspace.py b/utils/geoserver_utils/migrate_workspace.py new file mode 100644 index 0000000..9a3ef9a --- /dev/null +++ b/utils/geoserver_utils/migrate_workspace.py @@ -0,0 +1,143 @@ +import requests +from pymongo import MongoClient +from requests.auth import HTTPBasicAuth + +# Configuration for GeoServer and MongoDB +GEOSERVER_URL = "http://localhost:8081/geoserver/rest" +GEOSERVER_USER = "admin" +GEOSERVER_PASSWORD = "geoserver" +MONGODB_URI = "mongodb://localhost:27017" +DATABASE_NAME = "datadb" +COLLECTION_NAME = "Dataset" + + +# Function to create a new workspace in GeoServer +def create_workspace(workspace_name): + headers = {'Content-type': 'text/xml'} + data = f"{workspace_name}" + url = f"{GEOSERVER_URL}/workspaces" + + response = requests.post(url, auth=(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, data=data) + if response.status_code == 201: + print(f"Workspace '{workspace_name}' created successfully.") + elif response.status_code == 409: + print(f"Workspace '{workspace_name}' already exists.") + else: + print(f"Failed to create workspace '{workspace_name}': {response.content}") + + +# Function to move a layer to a new workspace +def move_layer_to_workspace(layer_name, source_workspace, target_workspace): + headers = {'Content-type': 'text/xml'} + url = f"{GEOSERVER_URL}/layers/{source_workspace}:{layer_name}" + + # Update the target workspace for the layer + data = f"{target_workspace}:{layer_name}" + response = requests.put(url, auth=(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, data=data) + + if response.status_code == 200: + print(f"Layer '{layer_name}' moved to workspace '{target_workspace}'.") + else: + print(f"Failed to move layer '{layer_name}': {response.content}") + + +# def change_workspace(source_workspace, target_workspace, dataset_id): +# # Get datastore information +# datastore_info = get_datastore(source_workspace, dataset_id) +# +# if datastore_info: +# # Move the datastore to the new workspace +# datastore_info['dataStore']['workspace'] = {'name': target_workspace} +# update_datastore(target_workspace, dataset_id, datastore_info) +# +# # Move the layer to the new workspace +# update_layer(target_workspace, dataset_id) +# +# else: +# print("Datastore not found or error occurred.") + + +def get_datastore(workspace, datastore): + url = f'{GEOSERVER_URL}/workspaces/{workspace}/datastores/{datastore}.json' + response = requests.get(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD)) + if response.status_code == 200: + return response.json() + else: + print(f"Failed to get datastore. Status code: {response.status_code}") + return None + + +# Function to update the datastore to a new workspace +def update_datastore(workspace, datastore, datastore_info): + # remove workspace field from the datastore_info + if 'workspace' in datastore_info['dataStore']: + del datastore_info['dataStore']['workspace'] + + url = f'{GEOSERVER_URL}/workspaces/{workspace}/datastores/{datastore}.json' + headers = {'Content-Type': 'application/json'} + response = requests.put(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, json=datastore_info) + if response.status_code in [200, 201]: + print(f"Successfully moved datastore '{datastore}' to workspace '{workspace}'.") + else: + print(f"Failed to move datastore. Status code: {response.status_code} Response: {response.text}") + + +def update_layer(workspace, layer): + url = f'{GEOSERVER_URL}/layers/{layer}.json' + layer_info = { + "layer": { + "workspace": { + "name": workspace + } + } + } + headers = {'Content-Type': 'application/json'} + response = requests.put(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD), + headers=headers, json=layer_info) + if response.status_code in [200, 201]: + print(f"Successfully moved layer '{layer}' to workspace '{workspace}'.") + else: + print(f"Failed to move layer. Status code: {response.status_code}") + + +def main(): + # Connect to MongoDB + client = MongoClient(MONGODB_URI) + db = client[DATABASE_NAME] + collection = db[COLLECTION_NAME] + + source_workspace = 'incore' + + for dataset in collection.find(): + owner = dataset['owner'] + dataset_id = str(dataset['_id']) + + # Create a new workspace for the user + create_workspace(owner) + + # there might be two different way. + # the first one is to use the space db and using the dataset id belongs to each user's space + # the second one is to iterate the dataset and check the owner field then move + # the layer to the user's workspace + + # use the second method + the_method = 2 + + # this is test for the first way, and it needs to be changed + # Iterate over each user in the MongoDB collection + # Move each layer to the user's workspace + if the_method == 1: + for layer_name in dataset_id: + move_layer_to_workspace(layer_name, 'INCORE', owner) + + # in here, testing the second way now. + # if the_method == 2: + # change_workspace(source_workspace, owner, dataset_id) + + # close the MongoDB connection + client.close() + + +if __name__ == "__main__": + main() + \ No newline at end of file