Skip to content

Commit 02aa196

Browse files
authored
Unset downloadURL if empty string (#4700)
1 parent fad1103 commit 02aa196

6 files changed

Lines changed: 290 additions & 3 deletions

File tree

.circleci/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,13 @@ workflows:
285285
- phpunit:
286286
matrix:
287287
parameters:
288-
drupal_core_constraint: ["~10.6.0", "~11.2.0", "11.3.0"]
288+
drupal_core_constraint: ["~10.6.0", "~11.2.0", "~11.3.0"]
289289
php_version: ["8.3"]
290290
database_version: ["mariadb:10.11"]
291291
- phpunit:
292292
matrix:
293293
parameters:
294-
drupal_core_constraint: ["10.6.0", "~11.2.0", "~11.3.0"]
294+
drupal_core_constraint: ["~10.6.0", "~11.2.0", "~11.3.0"]
295295
php_version: ["8.4"]
296296
database_version: ["mariadb:10.11"]
297297
- phpunit:

modules/dkan_metastore/config/schema/dkan_metastore.schema.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,6 @@ dkan_metastore.settings:
4242
retain_for:
4343
type: integer
4444
label: 'Number of days to keep referenced content before deletion'
45+
unset_download_url_if_empty:
46+
type: boolean
47+
label: 'Unset download URL if empty'

modules/dkan_metastore/src/Form/DkanDataSettingsForm.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ public function buildForm(array $form, FormStateInterface $form_state) {
8282

8383
$form['description'] = $this->getDescriptionMarkup();
8484
$form['redirect_to_datasets'] = $this->getRedirectCheckbox($config);
85+
$form['unset_download_url_if_empty'] = [
86+
'#type' => 'checkbox',
87+
'#title' => $this->t('Unset download URL if empty'),
88+
'#default_value' => $config->get('unset_download_url_if_empty') ?? 0,
89+
'#description' => $this->t('If enabled, and a dataset contains a distribution[].downloadURL property, the property will be unset if it is empty. For DCAT-US dataset schemas, this prevents a validation error in rare cases when a resource mapper entity is inadvertently deleted, leaving an empty downloadURL that fails validation. Leave this unchecked unless you are experiencing fatal validation errors on dataset load.'),
90+
];
8591
$form['html_allowed_properties'] = $this->getHtmlAllowedProperties($config);
8692
$form['html_allowed_html'] = $this->getHtmlAllowedHtml($config);
8793
$form['property_list'] = $this->getPropertyList($config);
@@ -223,6 +229,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) {
223229

224230
$this->config('dkan_metastore.settings')
225231
->set('redirect_to_datasets', $form_state->getValue('redirect_to_datasets'))
232+
->set('unset_download_url_if_empty', $form_state->getValue('unset_download_url_if_empty'))
226233
->set('property_list', $form_state->getValue('property_list'))
227234
->set('html_allowed_properties', $form_state->getValue('html_allowed_properties'))
228235
->set('html_allowed_html', $form_state->getValue('html_allowed_html'))

modules/dkan_metastore/src/LifeCycle/LifeCycle.php

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,14 @@ protected function distributionLoad(MetastoreItemInterface $data): void {
231231
if (is_string($downloadUrl)) {
232232
$downloadUrl = UrlHostTokenResolver::resolve($downloadUrl);
233233
}
234-
$metadata->data->downloadURL = $downloadUrl;
234+
235+
$unset_downloadUrl = $this->configFactory->get('dkan_metastore.settings')->get('unset_download_url_if_empty') ?? FALSE;
236+
if (!$downloadUrl && $unset_downloadUrl) {
237+
unset($metadata->data->downloadURL);
238+
}
239+
else {
240+
$metadata->data->downloadURL = $downloadUrl;
241+
}
235242

236243
// If describedBy contains dkan:// URI, convert to absolute URL.
237244
if (StreamWrapperManager::getScheme($metadata->data->describedBy ?? '') == MetastoreUrlGenerator::DKAN_SCHEME) {
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Drupal\Tests\dkan_metastore\LifeCycle;
6+
7+
use Drupal\dkan_common\DataResource;
8+
use Drupal\KernelTests\KernelTestBase;
9+
use RootedData\Exception\ValidationException;
10+
11+
/**
12+
* Some tests for LifeCycle hooks.
13+
*
14+
* @group dkan
15+
* @group dkan_metastore
16+
* @group kernel
17+
*
18+
* @covers \Drupal\dkan_metastore\LifeCycle\LifeCycle
19+
* @coversDefaultClass \Drupal\dkan_metastore\LifeCycle\LifeCycle
20+
*/
21+
class LifeCycleTest extends KernelTestBase {
22+
protected const DATASET_DATA = [
23+
'title' => 'Test Dataset',
24+
'identifier' => '123',
25+
'description' => 'Test Description',
26+
'modified' => '2026-01-01',
27+
'accessLevel' => 'public',
28+
'keyword' => ['test'],
29+
'distribution' => [
30+
[
31+
'title' => 'Test Distribution 1',
32+
'downloadURL' => 'http://example.com/1.csv',
33+
],
34+
],
35+
"publisher" => [
36+
"@type" => "org:Organization",
37+
"name" => "Test Org",
38+
],
39+
"theme" => [
40+
"Tag 1",
41+
"Tag 2",
42+
],
43+
];
44+
45+
46+
public static $modules = [
47+
'system',
48+
'node',
49+
'user',
50+
'field',
51+
'filter',
52+
'text',
53+
'dkan_metastore',
54+
'dkan_common',
55+
'dkan',
56+
'content_moderation',
57+
'workflows',
58+
];
59+
60+
/**
61+
* {@inheritdoc}
62+
*/
63+
protected function setUp(): void {
64+
parent::setUp();
65+
$this->installConfig('system');
66+
$this->installConfig('node');
67+
$this->installConfig('dkan_common');
68+
$this->installConfig('dkan_metastore');
69+
$this->installEntitySchema('node');
70+
$this->installSchema('node', ['node_access']);
71+
$this->installEntitySchema('content_moderation_state');
72+
$this->installConfig('field');
73+
$this->installEntitySchema('user');
74+
$this->installEntitySchema('resource_mapping');
75+
}
76+
77+
/**
78+
* Make sure that distributionLoad properly creates references.
79+
*/
80+
public function testDistributionLoad() {
81+
/**
82+
* @var \Drupal\dkan_metastore\MetastoreService $metastore
83+
*/
84+
$metastore = $this->container->get('dkan.metastore.service');
85+
$metadata = $metastore->getValidMetadataFactory()->get(json_encode(self::DATASET_DATA), 'dataset');
86+
$identifier = $metastore->post('dataset', $metadata);
87+
$result = $this->container->get('entity_type.manager')
88+
->getStorage('node')
89+
->loadByProperties(['type' => 'data', 'uuid' => $identifier]);
90+
$node = reset($result);
91+
92+
// Get the raw value from the database for field_json_metadata.
93+
$query = $this->container->get('database')->query(
94+
'SELECT field_json_metadata_value FROM {node__field_json_metadata} WHERE entity_id = :entity_id',
95+
[':entity_id' => $node->id()]
96+
);
97+
$json_raw = $query->fetchField();
98+
$dataset_raw = json_decode($json_raw, TRUE);
99+
$distribution_id = $dataset_raw['distribution'][0];
100+
$result = $this->container->get('entity_type.manager')
101+
->getStorage('node')
102+
->loadByProperties(['type' => 'data', 'uuid' => $distribution_id]);
103+
$distribution_node = reset($result);
104+
105+
// Get the raw value for the distribution JSON from the DB.
106+
$query = $this->container->get('database')->query(
107+
'SELECT field_json_metadata_value FROM {node__field_json_metadata} WHERE entity_id = :entity_id',
108+
[':entity_id' => $distribution_node->id()]
109+
);
110+
$json_raw = $query->fetchField();
111+
$distribution_raw = json_decode($json_raw, TRUE);
112+
$download_url_ref = $distribution_raw['data']['downloadURL'];
113+
$resource_parts = DataResource::parseUniqueIdentifier($download_url_ref);
114+
115+
// Delete the resource mapping entity.
116+
$storage = $this->container->get('entity_type.manager')->getStorage('resource_mapping');
117+
$entities = $storage->loadByProperties([
118+
'identifier' => $resource_parts['identifier'],
119+
'version' => $resource_parts['version'],
120+
'perspective' => $resource_parts['perspective'],
121+
]);
122+
foreach ($entities as $entity) {
123+
$entity->delete();
124+
}
125+
126+
// Avoid reusing the already-loaded distribution entity with a resolved URL.
127+
$this->container->get('entity_type.manager')->getStorage('node')->resetCache();
128+
129+
// Re-load the original dataset via the metastore service.
130+
try {
131+
$metastore->get('dataset', $identifier);
132+
$this->fail('Expected a ValidationException to be thrown due to the missing resource mapping.');
133+
}
134+
catch (ValidationException $e) {
135+
$this->assertEquals('JSON Schema validation failed.', $e->getMessage());
136+
}
137+
138+
// Enable the unset_download_url_if_empty setting and try again.
139+
$config = $this->container->get('config.factory')->getEditable('dkan_metastore.settings');
140+
$config->set('unset_download_url_if_empty', TRUE);
141+
$config->save();
142+
$this->container->get('config.factory')->reset('dkan_metastore.settings');
143+
$this->container->get('entity_type.manager')->getStorage('node')->resetCache();
144+
145+
$dataset = $metastore->get('dataset', $identifier);
146+
$this->assertArrayNotHasKey('downloadURL', $dataset->{"$.distribution[0]"});
147+
}
148+
149+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Drupal\Tests\dkan_metastore\Kernel\Service;
6+
7+
use Drupal\KernelTests\KernelTestBase;
8+
use Drupal\Tests\dkan_common\Traits\QueueRunnerTrait;
9+
10+
/**
11+
* @group dkan
12+
* @group dkan_metastore
13+
* @group kernel
14+
*
15+
* @covers \Drupal\dkan_metastore\Service\OrphanNodeProcessor
16+
* @coversDefaultClass \Drupal\dkan_metastore\Service\OrphanNodeProcessor
17+
*/
18+
class OrphanNodeProcessorTest extends KernelTestBase {
19+
use QueueRunnerTrait;
20+
21+
protected const DATASET_DATA = [
22+
'title' => 'Test Dataset',
23+
'identifier' => '123',
24+
'description' => 'Test Description',
25+
'modified' => '2026-01-01',
26+
'accessLevel' => 'public',
27+
'keyword' => ['test'],
28+
'distribution' => [
29+
[
30+
'title' => 'Test Distribution 1',
31+
'downloadURL' => 'http://example.com/1.csv',
32+
],
33+
],
34+
"publisher" => [
35+
"@type" => "org:Organization",
36+
"name" => "Test Org",
37+
],
38+
"theme" => [
39+
"Tag 1",
40+
"Tag 2",
41+
],
42+
];
43+
44+
45+
public static $modules = [
46+
'system',
47+
'node',
48+
'user',
49+
'field',
50+
'filter',
51+
'text',
52+
'dkan_metastore',
53+
'dkan_common',
54+
'dkan',
55+
'content_moderation',
56+
'workflows',
57+
];
58+
59+
/**
60+
* {@inheritdoc}
61+
*/
62+
protected function setUp(): void {
63+
parent::setUp();
64+
$this->installConfig('system');
65+
$this->installConfig('node');
66+
$this->installConfig('dkan_common');
67+
$this->installConfig('dkan_metastore');
68+
$this->installEntitySchema('node');
69+
$this->installSchema('node', ['node_access']);
70+
$this->installEntitySchema('content_moderation_state');
71+
$this->installConfig('field');
72+
$this->installEntitySchema('user');
73+
$this->installEntitySchema('resource_mapping');
74+
75+
$config = $this->config('dkan_metastore.settings');
76+
$config->set('orphan.delete', TRUE);
77+
$config->save();
78+
}
79+
80+
/**
81+
* Make sure that deleteOutdatedOrphans() removes all orphaned nodes.
82+
*/
83+
public function testOrphanNodeDeletion() {
84+
/**
85+
* @var \Drupal\dkan_metastore\MetastoreService $metastore
86+
*/
87+
$metastore = $this->container->get('dkan.metastore.service');
88+
$metadata = $metastore->getValidMetadataFactory()->get(json_encode(self::DATASET_DATA), 'dataset');
89+
$metastore->post('dataset', $metadata);
90+
$this->assertEquals($this->getRelatedItemCount() + 1, $this->getDataNodeCount());
91+
92+
$metastore->delete('dataset', '123');
93+
$this->assertEquals($this->getRelatedItemCount(), $this->getDataNodeCount());
94+
95+
$this->runQueues(['orphan_reference_processor']);
96+
/** @var \Drupal\dkan_metastore\Service\OrphanNodeProcessor $processor */
97+
$processor = $this->container->get('dkan.metastore.orphan_node_processor');
98+
$deleted_nids = $processor->deleteOutdatedOrphans();
99+
$this->assertEquals($this->getRelatedItemCount(), count($deleted_nids));
100+
$this->assertEquals(0, $this->getDataNodeCount());
101+
}
102+
103+
protected function getRelatedItemCount() {
104+
// There can only be one publisher and it's stored as an array.
105+
$publisher = count(self::DATASET_DATA['publisher']) ? 1 : 0;
106+
return count(self::DATASET_DATA['keyword'])
107+
+ count(self::DATASET_DATA['distribution'])
108+
+ $publisher
109+
+ count(self::DATASET_DATA['theme']);
110+
}
111+
112+
protected function getDataNodeCount() {
113+
return $this->container->get('entity_type.manager')->getStorage('node')
114+
->getQuery()
115+
->accessCheck(FALSE)
116+
->condition('type', 'data')
117+
->count()
118+
->execute();
119+
}
120+
121+
}

0 commit comments

Comments
 (0)