Skip to content

Commit

Permalink
feat:2117 rewritting the api endpoint returning a dataset as xml
Browse files Browse the repository at this point in the history
  • Loading branch information
alli83 committed Dec 4, 2024
1 parent 439b56c commit e3fea0a
Show file tree
Hide file tree
Showing 11 changed files with 378 additions and 610 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

- Feat #2117: Rewritting the api endpoint returning a dataset as xml
- Fix #1975: Remove empty line at the beginning of the xml result
- Fix #1727: Sort files and samples by id in descending order when querying
- Feat #2066: Update file attribute values form layout and add expand button for long values
Expand Down
289 changes: 289 additions & 0 deletions gigadb/app/services/DatasetToXmlService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
<?php

declare(strict_types=1);

namespace GigaDB\services;

use yii\base\Component;

class DatasetToXmlService
{
public function convertToXml(
\Dataset $model,
?\Image $image = null,
bool $isAll = false,
bool $isDatasetOnly = false,
bool $isOnlySample = false,
bool $isOnlyFile = false
): string
{
$xml = new \SimpleXMLElement("<?xml version=\"1.0\" encoding=\"UTF-8\"?><gigadb_entry></gigadb_entry>");

if ($isAll || $isDatasetOnly) {
$xml = $this->buildDataset($xml, $model, $image);
}

if ($isDatasetOnly) {
if (!$xml->asXML()) {
return 'XML is invalid';
}

return $xml->asXML();
}

if ($isAll || $isOnlySample) {
$xml = $this->buildSamples($xml, $model);
}

if ($isAll) {
$xml->addChild('experiments');
}

if ($isAll || $isOnlyFile) {
$xml = $this->buildFiles($xml, $model);
}

if (!$xml->asXML()) {
return 'XML is invalid';
}

return $xml->asXML();
}

private function buildSamples(\SimpleXMLElement $xml, \Dataset $model): \SimpleXMLElement
{
$samplesElement = $xml->addChild('samples');
$samples = $model->samples;

foreach ($samples as $sample) {
$sampleElement = $samplesElement->addChild('sample');
$sampleElement->addAttribute('submission_date', $sample->submission_date);
$sampleElement->addAttribute('id', (string) $sample->id);

$sampleElement->addChild('name', $sample->name);
$species = $sample->species;

$speciesElement = $sampleElement->addChild('species');
$speciesElement->addChild('tax_id', (string) $species->tax_id);
$speciesElement->addChild('common_name', $species->common_name);
$speciesElement->addChild('genbank_name', $species->genbank_name);
$speciesElement->addChild('scientific_name', $species->scientific_name);
$speciesElement->addChild('eol_link', $species->eol_link);

$sampleElement->addChild('sampling_protocol', $sample->sampling_protocol);
$sampleElement->addChild('consent_doc', $sample->consent_document);

$contactAuthor = $sampleElement->addChild('contact_author');
$contactAuthor->addChild('name', $sample->contact_author_name);
$contactAuthor->addChild('email', $sample->contact_author_email);

$relsamples = $sample->sampleRels;
$relatedSamplesElement = $sampleElement->addChild('related_samples');
foreach ($relsamples as $relsample) {
$relSample = $relatedSamplesElement->addChild('related_sample', $sample->name);
$relSample->addAttribute('relationship_type', $relsample->relationship->name);
}

$samplesAttrElement = $sampleElement->addChild('sample_attributes');
$sa_attributes = $sample->sampleAttributes;
foreach ($sa_attributes as $sa_attribute) {
$saattribute = $sa_attribute->attribute;
$attr = $samplesAttrElement->addChild('attribute');
$attr->addChild('key', $saattribute->attribute_name);
$attr->addChild('value', $sa_attribute->value);
$unit = $attr->addChild('unit', $sample_unit->name ?: NULL);
$unit->addAttribute('id', (string) $sa_attribute->unit_id);
$sample_unit = $sa_attribute->unit;
}
}

return $xml;
}

private function buildFiles(\SimpleXMLElement $xml, \Dataset $model): \SimpleXMLElement
{
$files = $model->files;
$filesElement = $xml->addChild('files');

foreach ($files as $file) {
$fileElement = $filesElement->addChild('file');
$fileElement->addAttribute('id', (string) $file->id);
$fileElement->addAttribute('index4blast', (string) $file->index4blast);
$fileElement->addAttribute('download_count', (string) $file->download_count);
$fileElement->addChild('name', $file->name);
$fileElement->addChild('location', $file->location);
$fdescription = preg_replace('/[<>]/', '', $file->description);
$fileElement->addChild('description', $fdescription);
$fileElement->addChild('extension', $file->extension);
$size = $fileElement->addChild('size', (string) $file->size);
$size->addAttribute('units', 'bytes');
$fileElement->addChild('release_date', $file->date_stamp);
$file_type = $file->type;
$type = $fileElement->addChild('type', $file_type->name);
$type->addAttribute('id', (string) $file_type->id);
$format = $fileElement->addChild('format', $file->format->name);
$format->addAttribute('id', (string) $file->format_id);

$linkedSamples = $fileElement->addChild('linked_samples');
$filesamples = $file->fileSamples;
foreach ($filesamples as $filesample) {
$fi_sample = $filesample->sample;

if (!$fi_sample) {
continue;
}
$linkedSample = $linkedSamples->addChild('linked_sample', $fi_sample->name);
$linkedSample->addAttribute('sample_id', (string) $filesample->sample_id);
}

$fileAttributes = $fileElement->addChild('file_attributes');
$fileattributes = $file->fileAttributes;

foreach ($fileattributes as $fileattribute) {
$attr = $fileAttributes->addChild('attribute');
$file_att = $fileattribute->attribute;
$file_unit = $fileattribute->unit;
$attr->addChild('key', $file_att->attribute_name);
$attr->addChild('value', $fileattribute->value);
$unit = $attr->addChild('unit', $file_unit ? $file_unit->name: NULL);
$unit->addAttribute('id', $file_unit ? $file_unit->id: '');
}

$fileElement->addChild('related_file');
}

return $xml;
}

private function buildDataset(\SimpleXMLElement $xml, \Dataset $model, ?\Image $image = null): \SimpleXMLElement
{
$datasetElement = $xml->addChild('dataset');
$datasetElement->addAttribute('id', (string) $model->id);
$datasetElement->addAttribute('doi', $model->identifier);

$submitterElement = $datasetElement->addChild('submitter');
$submitterElement->addChild('first_name', $submitter_first_name);
$submitterElement->addChild('last_name', $submitter_last_name);
$submitterElement->addChild('affiliation', $submitter_affiliation);
$submitterElement->addChild('username', $submitter_username);
$submitterElement->addChild('email', $submitter_email);

$title = strip_tags($model->title);
$datasetElement->addChild('title', $title);
$model->description = htmlspecialchars(str_replace('<br>', '<br />', $model->description), ENT_XML1, 'UTF-8');
$datasetElement->addChild('description', $model->description);

$authorsElement = $datasetElement->addChild('authors');
$authors = $model->authors;
usort($authors, function ($a, $b) {
return $a['id'] - $b['id'];
});
foreach ($authors as $author) {
$authorElement = $authorsElement->addChild('author');
$authorElement->addChild('firstname', $author->first_name);
$authorElement->addChild('middlename', $author->middle_name);
$authorElement->addChild('surname', $author->surname);
$authorElement->addChild('orcid', $author->orcid);
}

$dataTypeElement = $datasetElement->addChild('data_types');
$dataset_types = $model->datasetTypes;
foreach ($dataset_types as $dataset_type) {
$type = $dataTypeElement->addChild('dataset_type');
$type->addChild('type_name', $dataset_type->name);
$type->addChild('type_id', (string) $dataset_type->id);
}

$imageElement = $datasetElement->addChild('image');
$imageElement->addChild('image_filename', $image->location);
$imageElement->addChild('tag', $image->tag);
$imageElement->addChild('license', $image->license);
$imageElement->addChild('source', $image->source);
$imageElement->addChild('credit', $image->photographer);

$size = $datasetElement->addChild('dataset_size', (string) $model->dataset_size);
$size->addAttribute('units', 'bytes');
$datasetElement->addChild('ftp_site', $model->ftp_site);

$publication = $datasetElement->addChild('publication');
$publication->addAttribute('date', $model->publication_date);
$publisher = $publication->addChild('publisher');
$publisher->addAttribute('name', 'GigaScience database');
$publication->addChild('modification_date', $model->modification_date);
$fairUse = $publication->addChild('fair_use');
$fairUse->addAttribute('date', $this->fairnuse ?: '');

$links = $datasetElement->addChild('links');

$externalLinks = $links->addChild('external_links');
$external_links = $model->externalLinks;
foreach ($external_links as $external_link) {
$subLink = $externalLinks->addChild('external_link', $external_link->url);
$subLink->addAttribute('type', $external_link->externalLinkType->name);
}

$projectLinks = $links->addChild('project_links');
$project_links = $model->projects;
foreach ($project_links as $project) {
$projectLink = $projectLinks->addChild('project_link');
$projectLink->addChild('project_name', $project->name);
$projectLink->addChild('project_url', $project->url);
}

$internalLinks = $links->addChild('internal_links');
$internal_links = $model->relations;
foreach ($internal_links as $relation) {
$internalLink = $internalLinks->addChild('related_DOI');
$internalLink->addAttribute('relationship', $relation->relationship->name);
}

$manuscriptLinks = $links->addChild('manuscript_links');
$manuscripts = $model->manuscripts;
foreach ($manuscripts as $manuscript) {
$manuscriptLink = $internalLinks->addChild('manuscript_link');
$manuscriptLink->addChild('manuscript_DOI', $manuscript->identifier);
$manuscriptLink->addChild('manuscript_pmid', (string) $manuscript->pmid);
}

$alternativeIdentifierLinks = $links->addChild('alternative_identifiers');
$alternative_identifiers = $model->links;
foreach ($alternative_identifiers as $link) {
$linkname = explode(':', $link->link);
$name = $linkname[0];
$modelurl = \Prefix::model()->find('lower(prefix) = :p', array(':p' => strtolower($name)));
$value = $modelurl ? sprintf('%s%s',$modelurl->url, $linkname[1]) : $linkname[1];
$alternativeIdentifer = $alternativeIdentifierLinks->addChild('alternative_identifier', $value);
$alternativeIdentifer->addAttribute('is_primary', (string) $link->is_primary);
$alternativeIdentifer->addAttribute('prefix', $name);
}

$fundingLinks = $links->addChild('funding_links');
$dataset_funders = $model->datasetFunders;
foreach ($dataset_funders as $dataset_funder) {
$grant = $fundingLinks->addChild('grant');
$funder = $dataset_funder->funder;
$grant->addChild('funder_name', $funder->primary_name_display);
$grant->addChild('fundref_url', $funder->uri);
$grant->addChild('award', $dataset_funder->grant_award);
$grant->addChild('comment', $dataset_funder->comments);
}

$attribute = $datasetElement->addChild('ds_attributes');
$dataset_attributes = $model->datasetAttributes;
foreach ($dataset_attributes as $dataset_attribute) {
if (!$dataset_attribute->value) {
continue;
}

$attribute->addChild('attribute');
$attr = $dataset_attribute->attribute;
$unit = $dataset_attribute->units;
$attribute->addChild('key', $attr ? $attr->attribute_name : NULL);
$attribute->addChild('value', $dataset_attribute->value);
$unitEl = $attribute->addChild('unit');
$unitEl->addAttribute('id', $unit ? $unit->id : '');
}

return $xml;
}
}
3 changes: 3 additions & 0 deletions ops/configuration/yii-conf/web.dev.CI.php.dist
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ return [
]
],
'components' => [
'datasetToXml' => [
'class' => 'GigaDB\services\DatasetToXmlService'
],
'mailer' => [
'class' => 'yii\swiftmailer\Mailer',
'useFileTransport' => true,
Expand Down
12 changes: 8 additions & 4 deletions protected/controllers/ApiController.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,22 @@ public function actionDataset()
$image = $model->image;
ob_get_clean();


/** @var \GigaDB\services\DatasetToXmlService $datasetXml */
$datasetXml = \Yii::$app->datasetToXml;

switch ($result) {
case "dataset":
$this->renderPartial('singledatasetonly',array('model'=> $model, 'image' => $image));
$this->renderPartial('datasetAsXml', array('xml'=> $datasetXml->convertToXml($model, $image, false, true)));
break;
case "sample":
$this->renderPartial('singlesample',array('model'=> $model));
$this->renderPartial('datasetAsXml', array('xml'=> $datasetXml->convertToXml($model, null, false, false, true, false)));
break;
case "file":
$this->renderPartial('singlefile',array('model'=> $model));
$this->renderPartial('datasetAsXml', array('xml'=> $datasetXml->convertToXml($model, null, false, false, false, true)));
break;
case "all":
$this->renderPartial('singledataset',array('model'=> $model, 'image' => $image));
$this->renderPartial('datasetAsXml', array('xml'=> $datasetXml->convertToXml($model, $image, true)));
break;
default:
$this->_sendResponse(500, 'A problem occurred');
Expand Down
15 changes: 7 additions & 8 deletions protected/tests/functional/ApiTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class ApiTest extends FunctionalTesting
use BrowserPageSteps;

public function testItShouldOutputDatasetOnly() {
$url = "http://gigadb.dev/api/dataset/doi/100006?result=dataset" ;
$url = "http://gigadb.dev/api/dataset?doi=100006&result=dataset" ;

// Go to a page and getting xml content
$feed = $this->getXMLWithSessionAndUrl($url);
Expand All @@ -27,20 +27,20 @@ public function testItShouldOutputDatasetOnly() {
}

public function testItShouldOutputSamplesOnly() {
$url = "http://gigadb.dev/api/dataset/doi/100006?result=sample" ;
$url = "http://gigadb.dev/api/dataset?doi=100006&result=sample" ;

// Go to a page and getting xml content
$feed = $this->getXMLWithSessionAndUrl($url);

// Validate text presence on a page.
$this->assertEquals("Pygoscelis_adeliae", $feed->samples->sample[0]->name);
$this->assertEquals("Pygoscelis_adeliae", (string) $feed->samples->sample[0]->name);
$this->assertEquals("9238", $feed->samples->sample[0]->species->tax_id);
$this->assertNull($feed->files->file);
$this->assertNull($feed->dataset->title);
}

public function testItShouldOutputFilesOnly() {
$url = "http://gigadb.dev/api/dataset/doi/100006?result=file" ;
$url = "http://gigadb.dev/api/dataset?doi=100006&result=file" ;

// Go to a page and getting xml content
$feed = $this->getXMLWithSessionAndUrl($url);
Expand All @@ -52,19 +52,18 @@ public function testItShouldOutputFilesOnly() {
}

public function testItShouldOutputFullDataset() {
$url = "http://gigadb.dev/api/dataset/doi/100006?result=all" ;
$url = "http://gigadb.dev/api/dataset?doi=100006&result=all" ;

// Go to a page and getting xml content
$feed = $this->getXMLWithSessionAndUrl($url);

// Validate text presence on a page.
$this->assertEquals("Genomic data from Adelie penguin (Pygoscelis adeliae). ", $feed->dataset->title);
$this->assertEquals("Genomic data from Adelie penguin (Pygoscelis adeliae). ", (string) $feed->dataset->title);
$this->assertEquals("9238", $feed->samples->sample[0]->species->tax_id);
$this->assertEquals("Pygoscelis_adeliae.scaf.fa.gz", $feed->files->file[5]->name);
}

public function testItShouldOutputFullDatasetByDefault() {
$url = "http://gigadb.dev/api/dataset/doi/100006" ;
$url = "http://gigadb.dev/api/dataset?doi=100006" ;

// Go to a page and getting xml content
$feed = $this->getXMLWithSessionAndUrl($url);
Expand Down
4 changes: 4 additions & 0 deletions protected/views/api/datasetAsXml.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?php
header('Content-Type: text/xml');

echo $xml;
Loading

0 comments on commit e3fea0a

Please sign in to comment.