Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) Create a script to migrate the geoserver layers to the workspace with each user name #77

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,15 @@ utils/fragility_utils/data/

# token
*incoretoken*

# gis data
utils/geoserver_utils/data/
utils/gis_utils/*.dbf
utils/gis_utils/*.prj
utils/gis_utils/*.shp
utils/gis_utils/*.shx
utils/gis_utils/*.geobuf
utils/gis_utils/*.geojson
utils/gis_utils/*.json
utils/gis_utils/*.csv
utils/gis_utils/*.geoparquet
169 changes: 169 additions & 0 deletions utils/geoserver_utils/change_workspace_in_data_directory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import os
import shutil
import xml.etree.ElementTree as ET

# Define paths
data_dir = r"C:/Users/ywkim/Downloads/geoserver-2.22.4-bin/data_dir"
old_workspace = "incore" # Existing workspace
new_workspace = "ywkim" # New workspace
# datastore_name = "66ce2d4ccdfc8b4e6928de45" # The datastore or coverage store name
datastore_name = "66cf726a81983b19154d60f7"

# Paths for old and new workspace directories
old_workspace_path = os.path.join(data_dir, "workspaces", old_workspace, datastore_name)
new_workspace_path = os.path.join(data_dir, "workspaces", new_workspace, datastore_name)
new_workspace_xml_path = os.path.join(data_dir, "workspaces", new_workspace, "workspace.xml")
new_namespace_xml_path = os.path.join(data_dir, "workspaces", new_workspace, "namespace.xml")

# Paths for old and new data directories
old_data_path = os.path.join(data_dir, "data", old_workspace, datastore_name)
new_data_path = os.path.join(data_dir, "data", new_workspace, datastore_name)


# Function to get the workspace ID from the workspace.xml file
def get_workspace_id(workspace_xml_path):
tree = ET.parse(workspace_xml_path)
root = tree.getroot()
workspace_id = root.find("id").text
return workspace_id


# Function to get the namespace ID from the namespace.xml file
def get_namespace_id(namespace_xml_path):
tree = ET.parse(namespace_xml_path)
root = tree.getroot()
namespace_id = root.find("id").text
return namespace_id


# Step 2: Move the datastore/coveragestore directory from the old workspace to the new workspace
def move_datastore_or_coveragestore():
if not os.path.exists(new_workspace_path):
shutil.move(old_workspace_path, new_workspace_path)
print(f"Moved store from {old_workspace_path} to {new_workspace_path}.")
else:
print(f"Store already exists at {new_workspace_path}. No move needed.")

if not os.path.exists(new_data_path):
shutil.move(old_data_path, new_data_path)
print(f"Moved data from {old_data_path} to {new_data_path}.")
else:
print(f"Data already exists at {new_data_path}. No move needed.")


# Step 3: Edit the datastore.xml or coveragestore.xml with the new workspace ID and update the path
def update_store_xml():
datastore_xml_path = os.path.join(new_workspace_path, "datastore.xml")
coveragestore_xml_path = os.path.join(new_workspace_path, "coveragestore.xml")

if os.path.exists(datastore_xml_path):
update_datastore_xml(datastore_xml_path)
elif os.path.exists(coveragestore_xml_path):
update_coveragestore_xml(coveragestore_xml_path)
else:
print(f"Neither datastore.xml nor coveragestore.xml found in {new_workspace_path}.")


def update_datastore_xml(datastore_xml_path):
# Get the new workspace ID
new_workspace_id = get_workspace_id(new_workspace_xml_path)

tree = ET.parse(datastore_xml_path)
root = tree.getroot()

# Update the workspace ID
workspace = root.find(".//workspace/id")
if workspace is not None:
workspace.text = new_workspace_id

# Update the namespace URI
namespace = root.find(".//entry[@key='namespace']")
if namespace is not None:
namespace.text = f"http://{new_workspace}"

# Update the datastore's URL to point to the new path
url_entry = root.find(".//entry[@key='url']")
if url_entry is not None:
corrected_datastore_path = new_data_path.replace("\\", "/")
url_entry.text = f"file:/{corrected_datastore_path}/"

# Save the updated XML
tree.write(datastore_xml_path)
print(f"Updated {datastore_xml_path} with new workspace ID and namespace.")


def update_coveragestore_xml(coveragestore_xml_path):
# Get the new workspace ID
new_workspace_id = get_workspace_id(new_workspace_xml_path)

tree = ET.parse(coveragestore_xml_path)
root = tree.getroot()

# Update the workspace ID
workspace = root.find(".//workspace/id")
if workspace is not None:
workspace.text = new_workspace_id

# Update the coveragestore's URL to point to the new relative path
url_entry = root.find(".//url")
if url_entry is not None:
relative_path = os.path.join("data", new_workspace, datastore_name, os.path.basename(url_entry.text))
relative_path = relative_path.replace("\\", "/")
url_entry.text = f"file:{relative_path}"

# Save the updated XML
tree.write(coveragestore_xml_path)
print(f"Updated {coveragestore_xml_path} with new workspace ID and path.")


# Step 4: Edit all layers' featuretype.xml or coverage.xml with the new namespace ID
def update_layer_xml():
# Get the new namespace ID
new_namespace_id = get_namespace_id(new_namespace_xml_path)

for layer_dir in os.listdir(new_workspace_path):
featuretype_xml_path = os.path.join(new_workspace_path, layer_dir, "featuretype.xml")
coverage_xml_path = os.path.join(new_workspace_path, layer_dir, "coverage.xml")

if os.path.exists(featuretype_xml_path):
update_featuretype_xml(featuretype_xml_path, new_namespace_id)
elif os.path.exists(coverage_xml_path):
update_coverage_xml(coverage_xml_path, new_namespace_id)
else:
print(f"Neither featuretype.xml nor coverage.xml found in {layer_dir}.")


def update_featuretype_xml(featuretype_xml_path, new_namespace_id):
tree = ET.parse(featuretype_xml_path)
root = tree.getroot()

# Update the namespace ID
namespace = root.find(".//namespace/id")
if namespace is not None:
namespace.text = new_namespace_id

# Save the updated XML
tree.write(featuretype_xml_path)
print(f"Updated {featuretype_xml_path} with new namespace ID.")


def update_coverage_xml(coverage_xml_path, new_namespace_id):
tree = ET.parse(coverage_xml_path)
root = tree.getroot()

# Update the namespace ID
namespace = root.find(".//namespace/id")
if namespace is not None:
namespace.text = new_namespace_id

# Save the updated XML
tree.write(coverage_xml_path)
print(f"Updated {coverage_xml_path} with new namespace ID.")


# Run the steps
move_datastore_or_coveragestore()
update_store_xml()
update_layer_xml()

print("Store and layers updated successfully.")
143 changes: 143 additions & 0 deletions utils/geoserver_utils/migrate_workspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import requests
from pymongo import MongoClient
from requests.auth import HTTPBasicAuth

# Configuration for GeoServer and MongoDB
GEOSERVER_URL = "http://localhost:8081/geoserver/rest"
GEOSERVER_USER = "admin"
GEOSERVER_PASSWORD = "geoserver"
MONGODB_URI = "mongodb://localhost:27017"
DATABASE_NAME = "datadb"
COLLECTION_NAME = "Dataset"


# Function to create a new workspace in GeoServer
def create_workspace(workspace_name):
headers = {'Content-type': 'text/xml'}
data = f"<workspace><name>{workspace_name}</name></workspace>"
url = f"{GEOSERVER_URL}/workspaces"

response = requests.post(url, auth=(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, data=data)
if response.status_code == 201:
print(f"Workspace '{workspace_name}' created successfully.")
elif response.status_code == 409:
print(f"Workspace '{workspace_name}' already exists.")
else:
print(f"Failed to create workspace '{workspace_name}': {response.content}")


# Function to move a layer to a new workspace
def move_layer_to_workspace(layer_name, source_workspace, target_workspace):
headers = {'Content-type': 'text/xml'}
url = f"{GEOSERVER_URL}/layers/{source_workspace}:{layer_name}"

# Update the target workspace for the layer
data = f"<layer><defaultStyle><name>{target_workspace}:{layer_name}</name></defaultStyle></layer>"
response = requests.put(url, auth=(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, data=data)

if response.status_code == 200:
print(f"Layer '{layer_name}' moved to workspace '{target_workspace}'.")
else:
print(f"Failed to move layer '{layer_name}': {response.content}")


# def change_workspace(source_workspace, target_workspace, dataset_id):
# # Get datastore information
# datastore_info = get_datastore(source_workspace, dataset_id)
#
# if datastore_info:
# # Move the datastore to the new workspace
# datastore_info['dataStore']['workspace'] = {'name': target_workspace}
# update_datastore(target_workspace, dataset_id, datastore_info)
#
# # Move the layer to the new workspace
# update_layer(target_workspace, dataset_id)
#
# else:
# print("Datastore not found or error occurred.")


def get_datastore(workspace, datastore):
url = f'{GEOSERVER_URL}/workspaces/{workspace}/datastores/{datastore}.json'
response = requests.get(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD))
if response.status_code == 200:
return response.json()
else:
print(f"Failed to get datastore. Status code: {response.status_code}")
return None


# Function to update the datastore to a new workspace
def update_datastore(workspace, datastore, datastore_info):
# remove workspace field from the datastore_info
if 'workspace' in datastore_info['dataStore']:
del datastore_info['dataStore']['workspace']

url = f'{GEOSERVER_URL}/workspaces/{workspace}/datastores/{datastore}.json'
headers = {'Content-Type': 'application/json'}
response = requests.put(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD), headers=headers, json=datastore_info)
if response.status_code in [200, 201]:
print(f"Successfully moved datastore '{datastore}' to workspace '{workspace}'.")
else:
print(f"Failed to move datastore. Status code: {response.status_code} Response: {response.text}")


def update_layer(workspace, layer):
url = f'{GEOSERVER_URL}/layers/{layer}.json'
layer_info = {
"layer": {
"workspace": {
"name": workspace
}
}
}
headers = {'Content-Type': 'application/json'}
response = requests.put(url, auth=HTTPBasicAuth(GEOSERVER_USER, GEOSERVER_PASSWORD),
headers=headers, json=layer_info)
if response.status_code in [200, 201]:
print(f"Successfully moved layer '{layer}' to workspace '{workspace}'.")
else:
print(f"Failed to move layer. Status code: {response.status_code}")


def main():
# Connect to MongoDB
client = MongoClient(MONGODB_URI)
db = client[DATABASE_NAME]
collection = db[COLLECTION_NAME]

source_workspace = 'incore'

for dataset in collection.find():
owner = dataset['owner']
dataset_id = str(dataset['_id'])

# Create a new workspace for the user
create_workspace(owner)

# there might be two different way.
# the first one is to use the space db and using the dataset id belongs to each user's space
# the second one is to iterate the dataset and check the owner field then move
# the layer to the user's workspace

# use the second method
the_method = 2

# this is test for the first way, and it needs to be changed
# Iterate over each user in the MongoDB collection
# Move each layer to the user's workspace
if the_method == 1:
for layer_name in dataset_id:
move_layer_to_workspace(layer_name, 'INCORE', owner)

# in here, testing the second way now.
# if the_method == 2:
# change_workspace(source_workspace, owner, dataset_id)

# close the MongoDB connection
client.close()


if __name__ == "__main__":
main()