feat: Try to clean head

fix: Make classes public

feat: Clean head

feat: Make func public

fix: Make class static (bis)

refactor: Move visitor into HeadCleaner
This commit is contained in:
Valentin Perignon 2023-04-25 11:50:28 +02:00
parent f707b8680c
commit 1a33987f1c
1 changed files with 86 additions and 0 deletions

86
Sources/HeadCleaner.swift Normal file
View File

@ -0,0 +1,86 @@
//
// HeadCleaner.swift
// SwiftSoup
//
// Created by Valentin Perignon on 25/04/2023.
//
import Foundation
public enum HeadCleaner {
public static func clean(dirtyDocument: Document, cleanedDocument: Document) throws {
guard let dirtHead = dirtyDocument.head(), let cleanedHead = cleanedDocument.head() else { return }
try copySafeNodes(source: dirtHead, destination: cleanedHead)
}
static private func copySafeNodes(source: Element, destination: Element) throws {
let cleaningVisitor = CleaningVisitor(root: source, destination: destination)
try NodeTraversor(cleaningVisitor).traverse(source)
}
}
extension HeadCleaner {
private final class CleaningVisitor: NodeVisitor {
private static let allowedTags = ["style", "meta", "base"]
private let root: Element
private var destination: Element
private var elementToSkip: Element?
init(root: Element, destination: Element) {
self.root = root
self.destination = destination
}
public func head(_ node: SwiftSoup.Node, _ depth: Int) throws {
guard elementToSkip == nil else { return }
if let elementNode = node as? Element {
if isSafeTag(node: elementNode) {
let sourceTag = elementNode.nodeName()
guard let destinationAttributes = elementNode.attributes?.clone() else { return }
let destinationChild = Element(Tag(sourceTag), elementNode.baseUri ?? "", destinationAttributes)
try destination.appendChild(destinationChild)
destination = destinationChild
} else if node != root {
elementToSkip = elementNode
}
} else if let textNode = node as? TextNode {
let destinationText = TextNode(textNode.getWholeText(), textNode.getBaseUri())
try destination.appendChild(destinationText)
} else if let dataNode = node as? DataNode, let parent = node.parent(), isSafeTag(node: parent) {
let destinationData = DataNode(dataNode.getWholeData(), dataNode.getBaseUri())
try destination.appendChild(destinationData)
}
}
public func tail(_ node: SwiftSoup.Node, _ depth: Int) throws {
if node == elementToSkip {
elementToSkip = nil
} else if let elementNode = node as? Element, isSafeTag(node: elementNode) {
if let parent = destination.parent() {
destination = parent
} else {
throw Exception.Error(type: .IllegalArgumentException, Message: "Illegal state")
}
}
}
private func isSafeTag(node: Node) -> Bool {
guard !isMetaRefresh(node: node) else { return false }
let tag = node.nodeName().lowercased()
return Self.allowedTags.contains(tag)
}
private func isMetaRefresh(node: Node) -> Bool {
let tag = node.nodeName().lowercased()
guard tag == "meta" else { return false }
let attributeValue = try? node.attributes?.getIgnoreCase(key: "http-equiv").trim().lowercased()
return attributeValue == "refresh"
}
}
}