An initial approach towards scraping

This commit is contained in:
xcodereleases 2019-09-13 07:42:35 -06:00
parent ff7245deb8
commit 1796916ece
3 changed files with 101 additions and 54 deletions

View File

@ -8,10 +8,84 @@
import Foundation import Foundation
import WebKit import WebKit
let searchAllFrames = """
function xpath(x, node) {
var n = node;
if (n === null) {
n = document;
}
return document.evaluate(x, n, null, 5, null);
}
function x(str) {
var result = xpath(str, null);
if (result === null) {
var frames = document.getElementsByClassName("iframe");
for (var i = 0; i < frames.length; i++) {
result = xpath(str, frames[i].contentDocument);
if (result !== null) { break; }
}
}
return result;
}
"""
class Scraper: NSObject, WKNavigationDelegate, WKUIDelegate { class Scraper: NSObject, WKNavigationDelegate, WKUIDelegate {
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { private let config = WKWebViewConfiguration()
private let webView: WKWebView
private var _currentStep: Step?
private var _currentPage: XMLDocument?
private var loadCompletions = Dictionary<WKNavigation, (XMLDocument?) -> Void>()
override init() {
webView = WKWebView(frame: CGRect(x: 0, y: 0, width: 320, height: 2048), configuration: config)
super.init()
webView.navigationDelegate = self
webView.uiDelegate = self
}
func run() {
guard _currentStep == nil else { return }
move(to: LoadDownloadsPage(context: self))
}
func load(page: String, completion: @escaping (XMLDocument?) -> Void) {
guard let u = URL(string: page) else { return }
let r = URLRequest(url: u)
if let nav = webView.load(r) {
loadCompletions[nav] = completion
} else {
DispatchQueue.main.async { completion(nil) }
}
}
func currentURL() -> URL? { return webView.url }
func currentPage() -> XMLDocument? {
return _currentPage
}
func move(to step: Step) {
_currentStep?.end()
_currentStep = step
_currentStep?.start()
}
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) {
webView.evaluateJavaScript(searchAllFrames, completionHandler: nil)
webView.evaluateJavaScript("document.documentElement.outerHTML") { (result, error) in
var doc: XMLDocument?
if let html = result as? String {
doc = try? XMLDocument(data: Data(html.utf8), options: [.documentTidyHTML])
}
self._currentPage = doc
if let completion = self.loadCompletions.removeValue(forKey: navigation) {
completion(doc)
}
}
} }
} }

View File

@ -10,8 +10,28 @@ import WebKit
class Step: NSObject, WKNavigationDelegate { class Step: NSObject, WKNavigationDelegate {
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { let context: Scraper
// load up the new dom
init(context: Scraper) {
self.context = context
}
func start() { }
func end() { }
}
class LoadDownloadsPage: Step {
override func start() {
context.load(page: "https://developer.apple.com/download") { page in
guard let p = page else { return }
guard let u = self.context.currentURL() else { return }
if u.host == "idmsa.apple.com" {
print("login")
}
print("\(u.absoluteString)")
}
} }
} }

View File

@ -12,61 +12,14 @@ import AppKit
class XCCheckDelegate: NSObject, NSApplicationDelegate { class XCCheckDelegate: NSObject, NSApplicationDelegate {
var lastPrintedString: String? var lastPrintedString: String?
let scraper = Scraper()
func applicationDidFinishLaunching(_ notification: Notification) { func applicationDidFinishLaunching(_ notification: Notification) {
print("Hello world!") print("Hello world!")
printPercentage(0)
scraper.run()
} }
func printPercentage(_ v: Int) {
guard v <= 100 else { return }
if let last = lastPrintedString {
print("\u{001B}[2K")
lastPrintedString = nil
}
let val = min(max(v, 0), 100)
let string = pieces(for: val)
print(string)
lastPrintedString = string
DispatchQueue.main.asyncAfter(deadline: .now() + 0.125, execute: { self.printPercentage(v + 1) })
}
private func pieces(for percentage: Int) -> String {
let value = CGFloat(percentage) / 8.0
let wholeBoxes = Int(floor(value))
var final = ""
for _ in 0 ..< wholeBoxes {
final.append("")
}
let remaining = value - floor(value)
switch remaining {
case 0 ..< 0.125: final.append(" ")
case 0.125 ..< 0.25: final.append("")
case 0.25 ..< 0.375: final.append("")
case 0.375 ..< 0.5: final.append("")
case 0.5 ..< 0.625: final.append("")
case 0.625 ..< 0.75: final.append("")
case 0.75 ..< 0.875: final.append("")
case 0.875 ..< 1: final.append("")
default: fatalError()
}
let remainingCharacterCount = 12 - final.count
if remainingCharacterCount > 0 {
let spaces = String(repeating: " ", count: remainingCharacterCount)
final.append(spaces)
}
final = "[" + final + "] "
if percentage < 10 {
final += " \(percentage)%"
} else if percentage < 100 {
final += " \(percentage)%"
} else {
final += "\(percentage)%"
}
return final
}
} }
print(CommandLine.arguments) print(CommandLine.arguments)