FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects

Feature/am 427 sidebar only basic html

Merged A. Michaels requested to merge feature/am-427-sidebar-only-basic-html into main
3 files
+ 218
0
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 206
0
<?php
namespace Drupal\cambridge_migrations\Drush\Commands;
use Drush\Commands\DrushCommands;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\paragraphs\Entity\Paragraph;
use Symfony\Component\DependencyInjection\ContainerInterface;
use DOMDocument;
use DOMElement;
/**
* Drush command for cleaning up paragraph HTML content.
*/
class MigrateCleanupCommand extends DrushCommands {
/**
* The entity type manager.
*
* @var \Drupal\Core\Entity\EntityTypeManagerInterface
*/
protected $entityTypeManager;
/**
* Constructs a new MigrateCleanupCommand object.
*/
public function __construct(EntityTypeManagerInterface $entity_type_manager) {
parent::__construct();
$this->entityTypeManager = $entity_type_manager;
}
/**
* Clean HTML content by removing unwanted attributes and empty tags.
*
* @param string $html
* The HTML content to clean.
*
* @return string
* The cleaned HTML content.
*/
protected function cleanHtml($html) {
if (empty($html)) {
return $html;
}
// Create a new DOM document
$doc = new DOMDocument();
// Preserve whitespace to maintain formatting
$doc->preserveWhiteSpace = true;
$doc->formatOutput = true;
// Load HTML with UTF-8 encoding and suppress warnings
$doc->loadHTML('<?xml encoding="UTF-8">' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
// Remove unwanted attributes and empty tags recursively
$this->cleanNode($doc->documentElement);
// Get the cleaned HTML
$cleanedHtml = $doc->saveHTML($doc->documentElement);
// Remove the XML declaration if present
$cleanedHtml = preg_replace('/<\?xml[^>]+\?>/', '', $cleanedHtml);
return trim($cleanedHtml);
}
/**
* Recursively clean a DOM node by removing unwanted attributes and empty tags.
*
* @param DOMElement $node
* The DOM node to clean.
*
* @return bool
* TRUE if the node should be kept, FALSE if it should be removed.
*/
protected function cleanNode(DOMElement $node) {
// Remove style and script elements entirely
$tagName = strtolower($node->tagName);
if ($tagName === 'style' || $tagName === 'script') {
return false;
}
// Remove class and style attributes
$node->removeAttribute('class');
$node->removeAttribute('style');
// Process child nodes
$children = [];
foreach ($node->childNodes as $child) {
$children[] = $child;
}
$hasNonEmptyContent = false;
foreach ($children as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
// Recursively clean child element
$keepChild = $this->cleanNode($child);
if (!$keepChild) {
$node->removeChild($child);
} else {
$hasNonEmptyContent = true;
}
}
elseif ($child->nodeType === XML_TEXT_NODE) {
// Check if text node contains more than just whitespace and non-breaking spaces
$text = trim($child->nodeValue);
$textWithoutNbsp = str_replace("\xC2\xA0", '', $text); // Remove non-breaking spaces
if (!empty($textWithoutNbsp)) {
$hasNonEmptyContent = true;
}
}
}
// Return true if node has meaningful content
return $hasNonEmptyContent;
}
/**
* Clean up HTML content in text paragraphs.
*
* @command cambridge:migrate-cleanup
* @aliases cm-cleanup
*/
public function migrateCleanup() {
try {
// Query all paragraphs of type "text"
$storage = $this->entityTypeManager->getStorage('paragraph');
$query = $storage->getQuery()
->condition('type', 'text')
->accessCheck(FALSE);
$pids = $query->execute();
if (empty($pids)) {
$this->logger()->warning(dt('No text paragraphs found to clean up.'));
return;
}
$success_count = 0;
$error_count = 0;
foreach ($pids as $pid) {
try {
/** @var \Drupal\paragraphs\Entity\Paragraph $paragraph */
$paragraph = $storage->load($pid);
if (!$paragraph->hasField('field_text')) {
$this->logger()->warning(dt('Paragraph @pid does not have field_text field.', [
'@pid' => $pid,
]));
continue;
}
$text_field = $paragraph->get('field_text');
$value = $text_field->value;
$format = $text_field->format;
// Clean the HTML content
$cleaned_value = $this->cleanHtml($value);
// Update the paragraph if content changed
if ($cleaned_value !== $value) {
$text_field->setValue([
'value' => $cleaned_value,
'format' => $format,
]);
$paragraph->save();
$success_count++;
$this->logger()->notice(dt('Successfully cleaned HTML in paragraph @pid', [
'@pid' => $pid,
]));
}
}
catch (\Exception $e) {
$error_count++;
$this->logger()->error(dt('Error cleaning paragraph @pid: @message', [
'@pid' => $pid,
'@message' => $e->getMessage(),
]));
}
}
$this->logger()->notice(dt('HTML cleanup complete. Successes: @success, Errors: @errors', [
'@success' => $success_count,
'@errors' => $error_count,
]));
}
catch (\Exception $e) {
$this->logger()->error(dt('HTML cleanup failed: @message', [
'@message' => $e->getMessage(),
]));
}
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container) {
return new static(
$container->get('entity_type_manager')
);
}
}
Loading