Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor EnhancerScripts ItemEnhancerEntityType ItemEnhancerByDate #440

Open
wants to merge 1 commit into
base: main-cris
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.enhancer.script;

import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.UUID;

import org.apache.commons.cli.ParseException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.dspace.content.Item;
import org.dspace.content.enhancer.service.ItemEnhancerService;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.EntityTypeService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.core.exception.SQLRuntimeException;
import org.dspace.discovery.SearchUtils;
import org.dspace.discovery.SolrSearchCore;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.util.UUIDUtils;
import org.dspace.utils.DSpace;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Script that allows to enhance items, also forcing the updating of the
* calculated metadata with the enhancement.
*
* @author Luca Giamminonni (luca.giamminonni at 4science.it)

* This Script uses the solr search to discover the subset of entities being processed.
* This offers extended functionalities, e.g. enhance only items modified since or between
* timestamps etc... which cannot be expressed by the database on some easy way.
* - dateupper/datelower: filterquery for items between dates on the lastModified Date
* - entity: filterquery for entitytype (search.resourcetype)
* - collection: filterquery for collection (location.coll)
* - query: free hand search query, e.g. -cris.virtual.author:* . Best to use some criteria on already enhanced items
* - max: perform max items. Best for testing the entries.
* - limit: split result in smaller lists containint limit entries to avoid one big commit in the database
* and additional collection/entitytype queries as filterfacets.
*
* @author [email protected]
*
*/
public class ItemEnhancerByDateScript
extends DSpaceRunnable<ItemEnhancerByDateScriptConfiguration<ItemEnhancerByDateScript>> {

private ItemService itemService;
private CollectionService collectionService;

private ItemEnhancerService itemEnhancerService;

protected SolrSearchCore solrSearchCore;

private boolean force;
private UUID collection;
private String entitytype;

private String query;

private String dateupper;

private String datelower;

private Context context;

private int max;

private int limit;

private int counter = 0;

private int countertotal = 0;

private EntityTypeService entityTypeService;

private static final Logger log = LoggerFactory.getLogger(ItemEnhancerByDateScript.class);

@Override
public void setup() throws ParseException {

this.itemService = ContentServiceFactory.getInstance().getItemService();
this.collectionService = ContentServiceFactory.getInstance().getCollectionService();
this.entityTypeService = ContentServiceFactory.getInstance().getEntityTypeService();
itemEnhancerService = new DSpace().getSingletonService(ItemEnhancerService.class);
this.solrSearchCore =
DSpaceServicesFactory.getInstance().getServiceManager().getServicesByType(SolrSearchCore.class).get(0);

this.force = commandLine.hasOption('f');
if (commandLine.hasOption('c')) {
this.collection = UUIDUtils.fromString(commandLine.getOptionValue('c').trim());
}
if (commandLine.hasOption('e')) {
this.entitytype = commandLine.getOptionValue('e').trim();
}
if (commandLine.hasOption('q')) {
this.query = commandLine.getOptionValue('q').trim();
}
if (commandLine.hasOption('d')) {
this.dateupper = commandLine.getOptionValue('d').trim();

Check warning on line 118 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L118

Added line #L118 was not covered by tests
}
if (commandLine.hasOption('s')) {
this.datelower = commandLine.getOptionValue('s').trim();

Check warning on line 121 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L121

Added line #L121 was not covered by tests
}
if (commandLine.hasOption('m')) {
try {
this.max = Integer.parseInt(commandLine.getOptionValue('m').trim());
} catch (Exception e) {
handler.logError(e.getMessage());

Check warning on line 127 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L126-L127

Added lines #L126 - L127 were not covered by tests
}
}
if (commandLine.hasOption('l')) {
try {
this.limit = Integer.parseInt(commandLine.getOptionValue('l').trim());
} catch (Exception e) {
handler.logError(e.getMessage());

Check warning on line 134 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L133-L134

Added lines #L133 - L134 were not covered by tests
}
}
}

@Override
public void internalRun() throws Exception {
context = new Context();
assignCurrentUserInContext();
assignSpecialGroupsInContext();
if (commandLine.hasOption('e') && Objects.isNull(entityTypeService.findByEntityType(context, entitytype))) {
throw new Exception("unknown EntityType " + entitytype);
}
if (commandLine.hasOption('c') && (Objects.isNull(collection)
|| Objects.isNull(this.collectionService.find(context, collection)))) {
throw new Exception("specified Collection does not exist");
}
SolrPingResponse ping = solrSearchCore.getSolr().ping();
if (ping.getStatus() > 299) {
throw new Exception("Solr seems not to be available. Status" + ping.getStatus());

Check warning on line 153 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L153

Added line #L153 was not covered by tests
}

context.turnOffAuthorisationSystem();
try {
searchItems();
context.complete();
handler.logInfo("Enhancement completed with success");
} catch (Exception e) {
handler.handleException("An error occurs during enhancement. The process is aborted", e);
context.abort();
} finally {
context.restoreAuthSystemState();
}
}


private void searchItems() {
int maximum = 0; //maximum items to be processed
int total = 0; //results of search/query
List<String> items = new ArrayList<>();
try {
SolrDocumentList results = searchItemsInSolr(this.query, this.dateupper, this.datelower);
for (SolrDocument doc : results) {
String resourceid = (String) doc.getFieldValue(SearchUtils.RESOURCE_ID_FIELD);
if (Objects.nonNull(resourceid) && Objects.nonNull(UUIDUtils.fromString(resourceid))) {
items.add(resourceid);
}
}
} catch (SolrServerException | IOException e) {
handler.logError(e.getMessage(), e);
log.error(e.getMessage());

Check warning on line 184 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L182-L184

Added lines #L182 - L184 were not covered by tests
}
total = items.size();
if (total == 0) {
handler.logInfo("No results in solr-Query");
log.info("No results in solr-Query");
return;
} else if (this.max > 0) {
maximum = this.max;
if (this.max < items.size()) {
items = items.subList(0, (this.max - 1));
total = this.max - 1;

Check warning on line 195 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L194-L195

Added lines #L194 - L195 were not covered by tests
}
}

// split list and commit after limit entries
if (this.limit > 0) {
if (limit > total) {
limit = total;

Check warning on line 202 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L202

Added line #L202 was not covered by tests
}
// counting variables for pagination
int tempcounter = 0;
int start = 0;
int end = 0;
while (tempcounter < total) {
start = tempcounter;
end = tempcounter + limit;
if (end > total) {
end = total;
limit = total - tempcounter;
}
try {
this.itemService.findByIds(context, items.subList(start, end)).forEachRemaining(this::enhanceItem);
tempcounter += limit;
context.commit();
handler.logInfo("enhanced " + tempcounter + " out of max " + maximum + " items");
log.info("enhanced " + tempcounter + " out of max " + maximum + " items");
} catch (Exception e) {
tempcounter += limit;
handler.logError(e.getMessage());
handler.logInfo("enhanced " + tempcounter + " out of max " + maximum + " items");
log.info("enhanced " + tempcounter + " out of max " + maximum + " items");

Check warning on line 225 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L221-L225

Added lines #L221 - L225 were not covered by tests
}
}

} else {
// enhance all found items
try {
this.itemService.findByIds(context, items).forEachRemaining(this::enhanceItem);
} catch (SQLException e) {
handler.logError(e.getMessage());

Check warning on line 234 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L233-L234

Added lines #L233 - L234 were not covered by tests
}
}
handler.logInfo("enhanced " + counter + " items");
log.info("enhanced " + counter + " items");
}

private SolrDocumentList searchItemsInSolr(String query, String datequeryupper, String datequerylower)
throws SolrServerException, IOException {
SolrQuery sQuery;
if (Objects.nonNull(query)) {
sQuery = new SolrQuery(query);
} else {
sQuery = new SolrQuery("*");
}
if (Objects.nonNull(datequeryupper) && Objects.nonNull(datequerylower)) {
sQuery.addFilterQuery("lastModified:[" + datequerylower + " TO " + datequeryupper + "]");

Check warning on line 250 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L250

Added line #L250 was not covered by tests
} else if (Objects.nonNull(datequeryupper)) {
sQuery.addFilterQuery("lastModified:[* TO " + datequeryupper + "]");

Check warning on line 252 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L252

Added line #L252 was not covered by tests
} else if (Objects.nonNull(datequerylower)) {
sQuery.addFilterQuery("lastModified:[" + datequerylower + " TO *]");

Check warning on line 254 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L254

Added line #L254 was not covered by tests
}
if (Objects.nonNull(entitytype)) {
sQuery.addFilterQuery("search.entitytype:" + entitytype);

Check warning on line 257 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L257

Added line #L257 was not covered by tests
}
sQuery.addFilterQuery(SearchUtils.RESOURCE_TYPE_FIELD + ":Item");
if (Objects.nonNull(collection)) {
sQuery.addFilterQuery("location.coll:" + UUIDUtils.toString(collection));
}
sQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
if (max > 0) {
sQuery.setRows(this.max);
} else {
sQuery.setRows(Integer.MAX_VALUE);
}
sQuery.setSort("lastModified_dt",SolrQuery.ORDER.asc);
handler.logInfo("Query Params:" + sQuery.toString());
QueryResponse qResp = solrSearchCore.getSolr().query(sQuery);
return qResp.getResults();
}

private void enhanceItem(Item item) {
counter++;
itemEnhancerService.enhance(context, item, force);
uncacheItem(item);
}

private void uncacheItem(Item item) {
try {
context.uncacheEntity(item);
} catch (SQLException e) {
throw new SQLRuntimeException(e);

Check warning on line 285 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L284-L285

Added lines #L284 - L285 were not covered by tests
}
}

private void assignCurrentUserInContext() throws SQLException {
UUID uuid = getEpersonIdentifier();
if (uuid != null) {
EPerson ePerson = EPersonServiceFactory.getInstance().getEPersonService().find(context, uuid);
context.setCurrentUser(ePerson);

Check warning on line 293 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L292-L293

Added lines #L292 - L293 were not covered by tests
}
}

private void assignSpecialGroupsInContext() {
for (UUID uuid : handler.getSpecialGroups()) {
context.setSpecialGroup(uuid);
}

Check warning on line 300 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScript.java#L299-L300

Added lines #L299 - L300 were not covered by tests
}

@Override
@SuppressWarnings("unchecked")
public ItemEnhancerByDateScriptConfiguration<ItemEnhancerByDateScript> getScriptConfiguration() {
return new DSpace().getServiceManager().getServiceByName("item-enhancer-date",
ItemEnhancerByDateScriptConfiguration.class);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.enhancer.script;

import java.sql.SQLException;

import org.apache.commons.cli.Options;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.core.Context;
import org.dspace.scripts.configuration.ScriptConfiguration;
import org.springframework.beans.factory.annotation.Autowired;

/**
* Script configuration of {@link ItemEnhancerEntityTypeScript}.
*
* @author Luca Giamminonni (luca.giamminonni at 4science.it)
* @author Florian Gantner ([email protected])
*/
public class ItemEnhancerByDateScriptConfiguration<T extends ItemEnhancerByDateScript> extends ScriptConfiguration<T> {

@Autowired
private AuthorizeService authorizeService;

private Class<T> dspaceRunnableClass;

@Override
public boolean isAllowedToExecute(Context context) {
try {
return authorizeService.isAdmin(context);
} catch (SQLException e) {
throw new RuntimeException("SQLException occurred when checking if the current user is an admin", e);

Check warning on line 36 in dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScriptConfiguration.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/content/enhancer/script/ItemEnhancerByDateScriptConfiguration.java#L34-L36

Added lines #L34 - L36 were not covered by tests
}
}

@Override
public Options getOptions() {
if (options == null) {
Options options = new Options();

options.addOption("f", "force", false, "force the usage of the deep mode"
+ " (always compute the enhanced metadata to verify if the item need an update)");
options.addOption("c", "collection", true,
"uuid of the collection. If the collection does not exist the script aborts.");
options.addOption("e", "entity", true, "Entity type of the items");
options.addOption("d", "dateupper", true,
"iso date as upper range of date query for lastModified. e.g. 2022-10-27T12:12:17.369Z ");
options.addOption("s", "datelower", true, "iso date as lower range of date query for lastModified ");
options.addOption("m", "max", true, "--max results/rows from solr");
options.addOption("l", "limit", true, "commit after --limit entities processed");
options.addOption("q", "query", true,
"additional filterquery for the entities. this can f.e. be the exclusion of already enhanced metadata");

super.options = options;
}
return options;
}

@Override
public Class<T> getDspaceRunnableClass() {
return dspaceRunnableClass;
}

@Override
public void setDspaceRunnableClass(Class<T> dspaceRunnableClass) {
this.dspaceRunnableClass = dspaceRunnableClass;
}

}
Loading
Loading