1 /* 2 Copyright (c) 2022 Andrea Fontana 3 Permission is hereby granted, free of charge, to any person 4 obtaining a copy of this software and associated documentation 5 files (the "Software"), to deal in the Software without 6 restriction, including without limitation the rights to use, 7 copy, modify, merge, publish, distribute, sublicense, and/or sell 8 copies of the Software, and to permit persons to whom the 9 Software is furnished to do so, subject to the following 10 conditions: 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 16 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 17 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 /** HTML5 parser and DOM manipulation library. 24 * 25 * Parserino is a wrapper around the lexbor library 26 */ 27 module parserino; 28 29 import parserino.c.lexbor; 30 import std.string : representation; 31 import std.conv : to; 32 import std.experimental.logger; 33 import core.atomic; 34 import std.algorithm : splitter, filter, map, canFind; 35 import core.thread : Fiber; 36 37 /// Order of visit 38 enum VisitOrder 39 { 40 Normal, 41 Reverse 42 } 43 44 /// The HTML5 Document 45 struct Document 46 { 47 /// C-tor 48 this(const string html) { parse(html); } 49 50 bool opEquals(const typeof(null) o) const @safe nothrow pure { return !isValid; } 51 bool opEquals(D = Document)(const auto ref D d) const 52 { 53 import std.traits : isSomeString; 54 55 static if(isSomeString!D) return isValid && this.toString == d; 56 else 57 { 58 size_t invalid = 0; 59 if (!d.isValid) invalid++; 60 if (!isValid) invalid++; 61 62 if (invalid == 1) return false; 63 else if (invalid == 0) return d.payload.document == this.payload.document; 64 else return true; 65 } 66 } 67 68 /// Is this a valid html5 document? 69 @property pure @safe nothrow bool isValid() const { return payload != null && payload.document != null; } 70 71 /// 72 unittest 73 { 74 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 75 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 76 77 Document doc = Document("<html><body>"); 78 Document doc2; 79 Document docCpy = doc; 80 81 assert(doc != null); 82 assert(doc.isValid); 83 assert(docCpy != null); 84 assert(docCpy.isValid); 85 86 assert(doc2 == null); 87 assert(!doc2.isValid); 88 89 assert(doc == docCpy); 90 assert(doc2 != doc); 91 assert(doc2 != docCpy); 92 93 assert(Document.RefCounter.refs.length == 1); 94 assert(Document.RefCounter.refs[doc.payload] == 2); 95 96 } 97 98 /// Recreate the document from source 99 private void parse(const string html) 100 { 101 if(payload != null) 102 RefCounter.remove(payload); 103 104 payload = new DocumentPayload(); 105 106 with(payload) 107 { 108 document = lxb_html_document_create(); 109 parser = lxb_css_parser_create(); 110 lxb_css_parser_init(parser, null, null); 111 } 112 113 Document.RefCounter.add(payload); 114 115 lxb_html_document_clean(payload.document); 116 CallWithLexborString!lxb_html_document_parse(payload.document, html); 117 } 118 119 /// Return document as html string 120 string toString() const 121 { 122 extern(C) lxb_status_t cb(const lxb_char_t *data, size_t len, void *ctx) 123 { 124 *(cast(string*)ctx) ~= cast(string)data[0..len]; 125 return lexbor_status_t.LXB_STATUS_OK; 126 } 127 128 string output; 129 130 lxb_html_serialize_tree_cb(cast(lxb_dom_node*)&(payload.document.dom_document.node), &cb, &output); 131 return output; 132 } 133 134 unittest 135 { 136 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 137 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 138 139 Document doc; 140 doc.parse("<html>"); 141 assert(doc == "<html><head></head><body></body></html>"); 142 } 143 144 /// The content of <title> 145 @property string title() { return titleImpl(false); } 146 @property string rawTitle() { return titleImpl(true); } 147 148 /// Set the content of <title> 149 @property void title(const string s) { CallWithLexborString!lxb_html_document_title_set(payload.document, s); } 150 151 unittest 152 { 153 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 154 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 155 156 Document doc = Document("<html><head><title>Hello World!>"); 157 assert(doc.title == "Hello World!>"); 158 159 doc.title = "Goodbye"; 160 assert(doc.title == "Goodbye"); 161 } 162 163 /// Create a html element 164 Element createElement(string tagName) { return Element(payload, lxb_dom_document_create_element(&(payload.document.dom_document), tagName.representation.ptr, tagName.representation.length, null)); } 165 166 /// Create a text element 167 Element createText(string text) { Element e = createElement("#text"); e.innerText = text; return e; } 168 169 /// Create a comment 170 Element createComment(string text) { Element e = createElement("!--"); e.innerText = text; return e; } 171 172 /// 173 unittest 174 { 175 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 176 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 177 178 179 Document d = `<p>`; 180 181 Element p = d.body.firstChild; 182 Element t = d.createText("this is a test"); 183 Element c = d.createComment("this is a comment"); 184 p.appendChild(t); 185 p.appendChild(c); 186 187 188 assert(p == "<p>this is a test<!--this is a comment--></p>"); 189 } 190 191 unittest 192 { 193 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 194 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 195 196 Document doc = Document("<html>"); 197 Element e = doc.createElement("title"); 198 199 assert(e.isValid); 200 201 auto html = doc; 202 assert(html == "<html><head></head><body></body></html>"); 203 } 204 205 /// The <body> element 206 @property Element body() { return Element(payload, cast(lxb_dom_element_t*)lxb_html_document_body_element_noi(payload.document)); } 207 208 /// The <head> element 209 @property Element head() { return Element(payload, cast(lxb_dom_element_t*)lxb_html_document_head_element_noi(payload.document)); } 210 211 unittest 212 { 213 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 214 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 215 216 Document doc = Document("<html><body>Text"); 217 218 assert(doc.head.isValid()); 219 assert(doc.body.isValid()); 220 assert(doc.body.innerText == "Text"); 221 } 222 223 /// Get an element by id 224 Element byId(string id) { auto range = Element(payload, payload.document.dom_document.element).byId(id); return range; } 225 226 /// A lazy range of elements filtered by class 227 auto byClass(string name) { auto range = Element(payload, payload.document.dom_document.element).byClass(name); return range; } 228 229 /// A lazy range of elements filtered by tag name 230 auto byTagName(string name) { auto range = Element(payload, payload.document.dom_document.element).byTagName(name); return range; } 231 232 /// A lazy range of elements filtered by comment text 233 auto byComment(string comment) { auto range = Element(payload, payload.document.dom_document.element).byComment(comment); return range; } 234 235 /// A lazy range of elements filtered using a css selector 236 auto bySelector(string selector) { auto range = Element(payload, payload.document.dom_document.element).bySelector(selector); return range; } 237 238 this(ref return scope typeof(this) rhs) 239 { 240 if (rhs.payload == null) return; 241 payload = rhs.payload; 242 243 RefCounter.add(payload); 244 } 245 246 auto opAssign(const string html) 247 { 248 if(payload != null) 249 RefCounter.remove(payload); 250 251 payload = null; 252 253 parse(html); 254 } 255 256 auto opAssign(typeof(null) n) 257 { 258 if(payload != null) 259 RefCounter.remove(payload); 260 261 payload = null; 262 } 263 264 auto opAssign(typeof(this) rhs) 265 { 266 auto oldPayload = payload; 267 payload = rhs.payload; 268 269 if (payload != null) 270 RefCounter.add(payload); 271 272 if (oldPayload != null) 273 RefCounter.remove(oldPayload); 274 275 return this; 276 } 277 278 auto opCast(string)() const { return toString(); } 279 280 unittest 281 { 282 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 283 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 284 285 Document doc = "<p>"; 286 Document doc2; 287 doc2 = doc; 288 289 assert(doc == "<html><head></head><body><p></p></body></html>"); 290 assert(doc2 == "<html><head></head><body><p></p></body></html>"); 291 292 doc = Document("<b>"); 293 assert(doc == "<html><head></head><body><b></b></body></html>"); 294 assert(doc2 == "<html><head></head><body><p></p></body></html>"); 295 296 doc = "<i>"; 297 assert(doc == "<html><head></head><body><i></i></body></html>"); 298 assert(doc2 == "<html><head></head><body><p></p></body></html>"); 299 } 300 301 unittest 302 { 303 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 304 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 305 306 Document doc = "<html><p>"; 307 assert(doc == "<html><head></head><body><p></p></body></html>"); 308 309 doc = null; 310 doc = "<a>"; 311 assert(doc == "<html><head></head><body><a></a></body></html>"); 312 } 313 314 unittest 315 { 316 Document doc = Document("<html>"); 317 assert(doc.isValid); 318 assert(doc != null); 319 assert(Document.RefCounter.refs.length == 1); 320 assert(Document.RefCounter.refs[doc.payload] == 1); 321 322 DocumentPayload* docPayload = doc.payload; 323 324 Document doc2 = Document("<html><p>"); 325 assert(Document.RefCounter.refs.length == 2); 326 assert(Document.RefCounter.refs[doc2.payload] == 1); 327 328 assert(docPayload in Document.RefCounter.refs); 329 doc = doc2; 330 assert(docPayload !in Document.RefCounter.refs); 331 332 assert(Document.RefCounter.refs.length == 1); 333 assert(Document.RefCounter.refs[doc2.payload] == 2); 334 335 assert(doc == doc2); 336 337 assert(doc.isValid); 338 339 auto r = doc.byTagName("p"); 340 assert(r.empty == false); 341 r.destroy(); 342 343 344 import core.memory : GC; 345 GC.collect(); 346 347 doc = null; 348 assert(doc.isValid == false); 349 assert(doc == null); 350 351 doc2 = null; 352 } 353 354 ~this() { 355 if (payload == null) return; 356 RefCounter.remove(payload); 357 } 358 359 /// Create a document fragment from source 360 Element fragment(const string html) 361 { 362 Element element = createElement("div"); 363 auto node = cast(lxb_dom_element_t*)CallWithLexborString!lxb_html_document_parse_fragment(payload.document, element.element, html); 364 return Element(payload, node); 365 } 366 367 /// 368 unittest 369 { 370 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 371 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 372 373 Document doc = Document("<html>"); 374 Element e = doc.fragment("<p><b>hello</b>world"); 375 376 auto range = e.descendants(); 377 assert(range.front.name == "p"); 378 range.destroy(); 379 380 import core.memory : GC; 381 GC.collect(); 382 } 383 private: 384 385 string titleImpl(bool raw = false) 386 { 387 size_t length; 388 const(ubyte)* res; 389 390 if (!raw) res = lxb_html_document_title(payload.document, &length); 391 else res = lxb_html_document_title_raw(payload.document, &length); 392 393 return cast(string)res[0..length]; 394 } 395 396 DocumentPayload* payload; 397 398 struct DocumentPayload 399 { 400 lxb_html_document_t* document = null; 401 lxb_css_parser_t* parser = null; 402 } 403 404 struct RefCounter 405 { 406 static void add(DocumentPayload* resource) 407 { 408 //trace("ADD +1 document: ", resource); 409 if (resource == null) return; 410 411 auto r = (resource in refs); 412 if (r is null) refs[resource] = 1; 413 else atomicFetchAdd(*r, 1); 414 } 415 416 static void remove(DocumentPayload* resource) 417 { 418 //trace("REMOVE -1 document: ", resource); 419 if (resource == null) return; 420 421 size_t pre = atomicFetchSub(refs[resource], 1); 422 423 if (pre == 1) 424 { 425 //trace("DELETED document: ", resource); 426 lxb_html_document_destroy(resource.document); 427 lxb_css_parser_destroy(resource.parser, true); 428 refs.remove(resource); 429 } 430 } 431 432 private: 433 __gshared size_t[DocumentPayload*] refs; 434 } 435 436 } 437 438 439 /// A html element 440 struct Element 441 { 442 /// A simple key/value struct representing a html attribute 443 struct Attribute 444 { 445 string name; 446 string value; 447 } 448 449 private void onlyValidElements(string fname = __FUNCTION__) const 450 { 451 if (this.element == null) 452 throw new Exception("Can't call `" ~ fname ~ "` for an invalid/uninitialized node"); 453 } 454 455 private void onlyRealElements(string fname = __FUNCTION__) 456 { 457 onlyValidElements(fname); 458 459 if (this.element.node.type != lxb_dom_node_type_t.LXB_DOM_NODE_TYPE_ELEMENT) 460 throw new Exception("Can't call `" ~ fname ~ "` for a node with type " ~ name()); 461 } 462 463 /// The owner of this element 464 @property Document owner() 465 { 466 Document doc; 467 doc.payload = docPayload; 468 Document.RefCounter.add(doc.payload); 469 470 return doc; 471 } 472 473 /// Is this element empty? 474 @property bool isEmpty() { onlyValidElements(); return lxb_dom_node_is_empty(&element.node); } 475 476 unittest 477 { 478 version(DigitalMars) scope(exit) assert(Document.RefCounter.refs.length == 0); 479 version(DigitalMars) scope(exit) assert(Element.RefCounter.refs.length == 0); 480 481 Document doc = "<b>hello</b><br><b>"; 482 import std.array; 483 import std.algorithm : map; 484 assert(doc.body.children.map!(x => x.isEmpty()).array == [false, true, true]); 485 } 486 487 /// Is this element valid? 488 @property pure @safe nothrow bool isValid() const { return element != null; } 489 490 /// Return a lazy range of attributes for this element 491 @property auto attributes() 492 { 493 onlyRealElements(); 494 495 class AttributeRange 496 { 497 AttributeRange save() 498 { 499 auto newRange = new AttributeRange(docPayload, element); 500 newRange.current = current; 501 newRange.destroyed = destroyed; 502 return newRange; 503 } 504 505 Attribute front() { return Attribute(ReturnLexborString!lxb_dom_attr_local_name_noi(current), ReturnLexborString!lxb_dom_attr_value_noi(current)); } 506 void popFront() { current = lxb_dom_element_next_attribute_noi(current); if (empty) unref(); } 507 508 @property bool empty() { return current is null; } 509 510 ~this() { 511 unref(); 512 } 513 514 private: 515 516 void unref() { 517 518 if (!destroyed) 519 { 520 Element.RefCounter.remove(element); 521 Document.RefCounter.remove(docPayload); 522 } 523 524 destroyed = true; 525 } 526 527 @disable this(); 528 529 this(Document.DocumentPayload* docPayload, lxb_dom_element_t* e) 530 { 531 this.docPayload = docPayload; 532 element = e; 533 current = lxb_dom_element_first_attribute_noi(element); 534 535 if (!empty) 536 { 537 Document.RefCounter.add(docPayload); 538 Element.RefCounter.add(e); 539 } 540 else destroyed = true; 541 } 542 543 544 bool destroyed = false; 545 lxb_dom_attr_t* current = null; 546 547 Document.DocumentPayload* docPayload; 548 lxb_dom_element_t* element; 549 } 550 551 return new AttributeRange(docPayload, element); 552 } 553 554 /// Check if an attribute exists 555 @property bool hasAttribute(string attr) { onlyRealElements(); return CallWithLexborString!lxb_dom_element_has_attribute(element, attr); } 556 557 /// Remove an attribute from this element 558 @property void removeAttribute(string attr) { onlyRealElements(); CallWithLexborString!lxb_dom_element_remove_attribute(element, attr); } 559 560 /// Set an attribute for this element 561 @property void setAttribute(string name, string value) { onlyRealElements(); lxb_dom_element_set_attribute(element, name.representation.ptr, name.representation.length, value.representation.ptr, value.representation.length); } 562 563 /// Get an attribute 564 @property string getAttribute(string attr) { onlyRealElements(); return ReturnLexborString!(lxb_dom_element_get_attribute)(element, attr.representation.ptr, attr.representation.length); } 565 566 /// The id of this element (if present) 567 @property string id() 568 { 569 onlyRealElements(); 570 if (element.attr_id == null) return string.init; 571 return ReturnLexborString!lxb_dom_attr_value_noi(element.attr_id); 572 } 573 574 /// All the classes of this element 575 @property auto classes() 576 { 577 onlyRealElements(); 578 if (element.attr_class == null) return string.init.splitter(" "); 579 return ReturnLexborString!lxb_dom_attr_value_noi(element.attr_class).splitter(" "); 580 } 581 582 unittest 583 { 584 import std.array; 585 586 Document doc = Document(`<html><p class="hello world" id="world" style><a>`); 587 assert(doc.byId("world").attributes.array == [Attribute("class", "hello world"), Attribute("id", "world"), Attribute("style", "")]); 588 assert(doc.byTagName("a").front.attributes.empty); 589 590 auto p = doc.byTagName("p").front; 591 auto a = doc.byTagName("a").front; 592 593 assert(p.hasAttribute("style")); 594 assert(p.hasAttribute("href") == false); 595 assert(p.classes.array == ["hello", "world"]); 596 597 assert(p.id == "world"); 598 599 a.setAttribute("href", "url"); 600 601 assert(a.hasAttribute("href")); 602 assert(a.getAttribute("href") == "url"); 603 604 p.removeAttribute("id"); 605 p.removeAttribute("class"); 606 607 assert(p.hasAttribute("style")); 608 assert(!p.hasAttribute("id")); 609 assert(!p.hasAttribute("class")); 610 assert(p.id.length == 0); 611 assert(p.classes.array.length == 0); 612 } 613 614 bool opEquals(const typeof(null) o) const @safe nothrow pure { return !isValid; } 615 bool opEquals(E = Element)(const auto ref E e) const 616 { 617 import std.traits : isSomeString; 618 static if (isSomeString!E) return e == this.toString; 619 else return e.element == this.element; 620 } 621 622 623 /// Tag 624 @property string name() { onlyValidElements(); return ReturnLexborString!lxb_dom_element_local_name(element); } 625 626 unittest 627 { 628 { 629 Document doc = Document(`<!doctype html><html><body id="bo"/>`); 630 Element a; 631 Element b = doc.body; 632 Element c = doc.head; 633 Element d = b; 634 Element e = a; 635 636 assert(a.isValid == false); 637 assert(a == e); 638 assert(a == null); 639 640 assert(b != a); 641 assert(b != c); 642 assert(d == b); 643 644 Element f = doc.byTagName("body").front; 645 Element g = doc.byId("bo"); 646 647 assert(b == f); 648 assert(b == g); 649 650 assert(g.name == "body"); 651 assert(c.name == "head"); 652 } 653 } 654 655 alias clone = dup; 656 /// Clone this element 657 Element dup(bool deep = true) 658 { 659 onlyValidElements(); 660 661 auto newElement = lxb_dom_element_interface_clone(&(docPayload.document.dom_document), element); 662 auto e = Element(docPayload, newElement); 663 664 // FIXME: not the best option 665 if(deep) 666 e.innerHTML = innerHTML; 667 668 return e; 669 } 670 671 /// 672 unittest 673 { 674 import std.array; 675 import std.algorithm : map; 676 677 Document doc = Document(`<html><p data-a="a" data-b="b"><i></i><b></b>`); 678 Element e = doc.byTagName("p").front; 679 Element f = e; 680 Element g = e.dup(false); 681 Element h = g; 682 683 assert(e!=g); 684 assert(f!=g); 685 assert(g==h); 686 687 assert(g.name == "p"); 688 assert(g.attributes.array == [Attribute("data-a", "a"), Attribute("data-b", "b")]); 689 assert(g.descendants.map!(x=>x.name).array == []); 690 691 g = e.clone(true); 692 assert(g.name == "p"); 693 assert(g.attributes.array == [Attribute("data-a", "a"), Attribute("data-b", "b")]); 694 assert(g.descendants.map!(x=>x.name).array == ["i", "b"]); 695 } 696 697 /// Prepend an element 698 void prependSibling(E = Element)(auto ref E el) 699 { 700 import std.array : array; 701 onlyValidElements(); 702 703 import std.traits; 704 static if (is(E == FragmentString)) 705 { 706 Element root = owner.fragment(el); 707 foreach(ref e; root.children(false).array) 708 prependSibling(e); 709 } 710 else 711 { 712 static if (isSomeString!E) 713 { 714 Element e = owner.createElement("#text"); 715 e.innerText = el; 716 } 717 else alias e = el; 718 719 e.remove(); 720 lxb_dom_node_insert_before(&(element.node), &(e.element.node)); 721 } 722 } 723 724 /// Append an element 725 void appendSibling(E = Element)(auto ref E el) 726 { 727 import std.array : array; 728 onlyValidElements(); 729 730 import std.traits; 731 static if (is(E == FragmentString)) 732 { 733 Element root = owner.fragment(el); 734 foreach(ref e; root.children!(VisitOrder.Reverse)(false).array) 735 appendSibling(e); 736 } 737 else 738 { 739 static if (isSomeString!E) 740 { 741 Element e = owner.createElement("#text"); 742 e.innerText = el; 743 } 744 else alias e = el; 745 746 e.remove(); 747 lxb_dom_node_insert_after(&(element.node), &(e.element.node)); 748 } 749 } 750 751 /// Put a new child in the first position 752 void prependChild(E = Element)(auto ref E el) 753 { 754 import std.array : array; 755 onlyRealElements(); 756 757 import std.traits; 758 static if (is(E == FragmentString)) 759 { 760 Element root = owner.fragment(el); 761 foreach(ref e; root.children!(VisitOrder.Reverse)(false).array) 762 prependChild(e); 763 } 764 else 765 { 766 static if (isSomeString!E) 767 { 768 Element e = owner.createElement("#text"); 769 e.innerText = el; 770 } 771 else alias e = el; 772 773 e.remove(); 774 auto last = element.node.first_child; 775 if (last == null) lxb_dom_node_insert_child(&(element.node), &(e.element.node)); 776 else lxb_dom_node_insert_before(last, &(e.element.node)); 777 } 778 } 779 780 /// Put a new child in the last position 781 void appendChild(E = Element)(auto ref E el) 782 { 783 import std.array : array; 784 onlyRealElements(); 785 786 import std.traits; 787 788 static if (is(E == FragmentString)) 789 { 790 Element root = owner.fragment(el); 791 foreach(ref e; root.children(false).array) 792 appendChild(e); 793 } 794 else 795 { 796 static if (isSomeString!E) 797 { 798 Element e = owner.createElement("#text"); 799 e.innerText = el; 800 } 801 else alias e = el; 802 e.remove(); 803 auto last = element.node.last_child; 804 if (last == null) lxb_dom_node_insert_child(&(element.node), &(e.element.node)); 805 else lxb_dom_node_insert_after(last, &(e.element.node)); 806 } 807 } 808 809 /// 810 unittest 811 { 812 Document doc = "<p>"; 813 Element p = doc.byTagName("p").front; 814 p.prependSibling("<a>first-before</a><a><b>second</b></a>".asFragment); 815 p.appendSibling("<a>first-after</a><a><b>second</b></a>".asFragment); 816 assert(doc.toString() == `<html><head></head><body><a>first-before</a><a><b>second</b></a><p></p><a>first-after</a><a><b>second</b></a></body></html>`); 817 } 818 819 /// 820 unittest 821 { 822 Document doc = `<p id="start">`; 823 824 Element p = doc.byTagName("p").front; 825 p.appendChild("<p>post</p><p>post1</p>".asFragment); 826 p.prependChild("<p>pre</p><p>pre1</p>".asFragment); 827 p.appendChild("<p>text</p>"); 828 829 assert(doc.body.toString == `<body><p id="start"><p>pre</p><p>pre1</p><p>post</p><p>post1</p><p>text</p></p></body>`); 830 } 831 832 833 unittest 834 { 835 Document doc = "<p>"; 836 Element e = doc.body.children.front; 837 e.appendChild("world"); 838 e.prependChild("hello"); 839 e.appendChild("!"); 840 e.prependSibling("before"); 841 e.appendSibling("after"); 842 assert(doc == "<html><head></head><body>before<p>helloworld!</p>after</body></html>"); 843 } 844 845 void opOpAssign(string op)(auto ref Element e) if (op == "~") { onlyRealElements(); appendChild(e); } 846 847 /// Remove this element from the document 848 bool remove() 849 { 850 onlyValidElements(); 851 852 if (element.node.parent == null) return false; 853 lxb_dom_node_remove(&(element.node)); 854 return true; 855 } 856 857 unittest 858 { 859 Document doc = "<p>"; 860 Element bod = doc.body; 861 Element other = doc.createElement("a"); 862 bod ~= other; 863 bod ~= doc.createElement("a"); 864 bod.appendChild(doc.createElement("b")); 865 bod.prependChild(doc.createElement("i")); 866 assert(doc.body.toString == "<body><i></i><p></p><a></a><a></a><b></b></body>"); 867 } 868 869 unittest 870 { 871 Document doc = "<p><b>hello"; 872 auto comment = doc.createElement("!--"); 873 comment.innerText = "comment"; 874 doc.byTagName("b").front.prependSibling(comment); 875 doc.byTagName("b").front.appendSibling(comment); 876 assert(!doc.byTagName("!--").empty); 877 assert(doc.byTagName("p").front.toString == `<p><b>hello</b><!--comment--></p>`); 878 } 879 880 unittest 881 { 882 Document doc = Document("<html>"); 883 884 Element p = doc.createElement("p"); 885 doc.body.appendChild(p); 886 887 Element b = doc.createElement("b"); 888 Element i = doc.createElement("i"); 889 Element a = doc.createElement("a"); 890 p.prependSibling(b); 891 p.appendSibling(i); 892 p.appendChild(a); 893 894 assert(doc.toString == `<html><head></head><body><b></b><p><a></a></p><i></i></body></html>`); 895 } 896 897 unittest 898 { 899 Document doc = "<html>"; 900 auto e = doc.createElement("a"); 901 assert (doc == "<html><head></head><body></body></html>"); 902 903 assert(e.remove() == false); 904 905 doc.body.appendChild(e); 906 assert (doc == "<html><head></head><body><a></a></body></html>"); 907 908 assert(e.remove() == true); 909 assert(doc == "<html><head></head><body></body></html>"); 910 911 assert(e.remove() == false); 912 assert(doc == "<html><head></head><body></body></html>"); 913 914 assert(e == "<a></a>"); 915 } 916 917 918 /// Replace this element with another one 919 void replaceWith(E = Element)(auto ref E el) 920 { 921 onlyValidElements(); 922 923 assert(el != this); 924 925 import std.traits : isSomeString; 926 927 static if (isSomeString!E) 928 { 929 Element e = owner.createElement("#text"); 930 e.innerText = el; 931 } 932 else alias e = el; 933 934 e.remove(); 935 prependSibling(e); 936 remove(); 937 } 938 939 /// Copy another element here 940 void copyFrom(E = Element)(auto ref E e, bool deep = true) 941 { 942 onlyRealElements(); 943 944 assert(e != this); 945 946 import std.algorithm : map; 947 import std.array : array; 948 auto attr = attributes.map!(x => x.name).array; 949 950 foreach(a; attr) 951 removeAttribute(a); 952 953 lxb_dom_element_interface_copy(element, e.element); 954 955 // FIXME: not the best option 956 if(deep) 957 innerHTML = e.innerHTML; 958 } 959 960 961 unittest 962 { 963 Document d = `<p id="hello"></p><a>`; 964 965 auto p = d.byId("hello"); 966 auto a = d.byTagName("a").front; 967 968 p.copyFrom(a); 969 970 assert(p.attributes.empty); 971 } 972 973 unittest 974 { 975 Document doc = `<html><p class="p1"><b><p class="p2"><i>`; 976 977 Element p1 = doc.byClass("p1").frontOrThrow; 978 Element p1Copy = p1; 979 980 Element p2 = doc.byClass("p2").frontOrThrow; 981 Element b = p1.descendants.frontOrThrow; 982 Element i = p2.descendants.frontOrThrow; 983 984 i.replaceWith(b); 985 assert(p1.descendants.empty == true); 986 assert(p2.descendants.frontOrThrow.name == "b"); 987 assert(p2.descendants.frontOrThrow == b); 988 989 p1.copyFrom(p2); 990 assert(p1.classes.frontOrThrow == "p2"); 991 assert(p1Copy.classes.frontOrThrow == "p2"); 992 assert(p1.descendants.frontOrThrow.name == "b"); 993 assert(p1.descendants.frontOrThrow != b); 994 assert(p2.descendants.frontOrThrow == b); 995 996 } 997 998 /// Set the html content of this element 999 @property void innerHTML(string html) { onlyValidElements(); CallWithLexborString!lxb_html_element_inner_html_set(cast(lxb_html_element_t*)element, html); } 1000 1001 /// Get the content of this element 1002 @property string innerHTML() 1003 { 1004 onlyRealElements(); 1005 1006 extern(C) lxb_status_t cb(const lxb_char_t *data, size_t len, void *ctx) 1007 { 1008 *(cast(string*)ctx) ~= cast(string)data[0..len]; 1009 return lexbor_status_t.LXB_STATUS_OK; 1010 } 1011 1012 string output; 1013 lxb_html_serialize_deep_cb(&(element.node), &cb, &output); 1014 return output; 1015 } 1016 1017 /// Set the inner text of this element (replacing html) 1018 @property string innerText() { onlyValidElements(); return ReturnLexborString!lxb_dom_node_text_content(&(element.node)); } 1019 1020 /// Get the inner text of this element (ignoring html tags) 1021 @property void innerText(string text) { onlyValidElements(); CallWithLexborString!lxb_dom_node_text_content_set(&(element.node), text); } 1022 1023 /// 1024 unittest 1025 { 1026 import std.array; 1027 1028 Document doc = Document("<html><p>"); 1029 Element p = doc.byTagName("p").front; 1030 1031 assert(p.descendants.empty); 1032 p.innerHTML = `<a href="uri">link</a>`; 1033 assert(p.descendants.front.name == "a"); 1034 assert(p.byTagName("a").front.innerText == "link"); 1035 1036 p.byTagName("a").front.innerText = "hello"; 1037 assert(p.byTagName("a").front.innerText == "hello"); 1038 1039 p.innerText = "plain text"; 1040 1041 assert(p.descendants(true).front.name == "#text"); 1042 assert(p.innerText == "plain text"); 1043 assert(p.byTagName("a").empty); 1044 1045 } 1046 1047 /// See_also: Document.byId 1048 Element byId(string id) 1049 { 1050 onlyRealElements(); 1051 1052 auto r = descendants() 1053 .filter!(c => c.element.attr_id != null) 1054 .filter!((c){ 1055 size_t len; 1056 auto s = lxb_dom_attr_value_noi(c.element.attr_id, &len); 1057 return (cast(string)s[0..len] == id); 1058 }); 1059 1060 scope(exit) r.destroy(); 1061 1062 if (r.empty) throw new Exception("Element not found"); 1063 else return r.front; 1064 } 1065 1066 /// See_also: Document.byClass 1067 auto byClass(string name) 1068 { 1069 onlyRealElements(); 1070 1071 return descendants() 1072 .filter!(c => c.element.attr_class != null) 1073 .filter!((c){ 1074 size_t len; 1075 auto s = lxb_dom_attr_value_noi(c.element.attr_class, &len); 1076 return ((cast(string)s[0..len]).splitter(" ").canFind(name)); 1077 }); 1078 } 1079 1080 /// See_also: Document.byTagName 1081 auto byTagName(string name) 1082 { 1083 onlyRealElements(); 1084 1085 return descendants(true) 1086 .filter!((c){ 1087 return name == ReturnLexborString!lxb_dom_element_local_name(c.element); 1088 }); 1089 } 1090 1091 /// See_also: Document.byComment 1092 auto byComment(string comment, bool stripSpaces = true) 1093 { 1094 import std.string : strip; 1095 onlyRealElements(); 1096 1097 return byTagName("!--").filter!(x => stripSpaces?(x.innerText.strip == comment.strip):(x.innerText == comment)); 1098 } 1099 1100 /// See_also: Document.bySelector 1101 auto bySelector(string selector) { onlyRealElements(); return new SelectorElementRange(docPayload, element, selector); } 1102 1103 /// 1104 unittest 1105 { 1106 Document doc = Document( 1107 `<html><body> 1108 <ul><li>one</li><li id="this">two</li></ul> 1109 <h4>title</h4> 1110 <ul><li>three</li><li>four</li><li>five</li></ul> 1111 `); 1112 1113 import std.array; 1114 Element[] res = doc.bySelector("h4+ul li:nth-of-type(2), #this").array; 1115 1116 assert(res.length == 2); 1117 assert(res[0].innerText == "four"); 1118 assert(res[1].innerText == "two"); 1119 } 1120 1121 unittest 1122 { 1123 Document doc = "<div><!--hello--><p></p></div>"; 1124 Element e = doc.byComment("hello").front; 1125 assert(e.next.name == "p"); 1126 assert(doc.byComment("hell").frontOrInit == null); 1127 1128 } 1129 1130 unittest 1131 { 1132 Document doc = Document(`<html><body><p id="test"/><p id="another" class="hello world">this is a text`); 1133 1134 { 1135 import std.exception : assertThrown; 1136 1137 Element e = doc.byId("test"); 1138 assert(e.isValid); 1139 assert(e.name == "p"); 1140 assert(e.id == "test"); 1141 assertThrown(doc.byId("blah")); 1142 } 1143 1144 import std.array; 1145 1146 { 1147 Element[] res = doc.byClass("world").array; 1148 assert(res.length == 1); 1149 assert(res[0].id == "another"); 1150 assert(res[0].name == "p"); 1151 } 1152 1153 { 1154 Element[] res = doc.byTagName("p").array; 1155 assert(res.length == 2); 1156 assert(res[0].id == "test"); 1157 assert(res[1].id == "another"); 1158 } 1159 } 1160 1161 1162 1163 /// The next element in the document 1164 @property Element next(bool includeAllElements = false) 1165 { 1166 onlyValidElements(); 1167 lxb_dom_node* el = element.node.next; 1168 1169 with(lxb_dom_node_type_t) 1170 { 1171 1172 while(el != null) 1173 { 1174 if (cast(int)el.type == cast(int)LXB_DOM_NODE_TYPE_ELEMENT) 1175 break; 1176 1177 if (includeAllElements) 1178 break; 1179 1180 el = el.next; 1181 } 1182 } 1183 1184 return Element(docPayload, cast(lxb_dom_element_t*)el); 1185 } 1186 1187 /// The previous element in the document 1188 @property Element prev(bool includeAllElements = false) 1189 { 1190 onlyValidElements(); 1191 lxb_dom_node* el = element.node.prev; 1192 1193 with(lxb_dom_node_type_t) 1194 { 1195 1196 while(el != null) 1197 { 1198 if (cast(int)el.type == cast(int)LXB_DOM_NODE_TYPE_ELEMENT) 1199 break; 1200 1201 if (includeAllElements) 1202 break; 1203 1204 el = el.prev; 1205 } 1206 } 1207 1208 return Element(docPayload, cast(lxb_dom_element_t*)el); 1209 } 1210 1211 /// The parent element 1212 @property Element parent() { onlyValidElements(); return Element(docPayload,cast(lxb_dom_element_t*) element.node.parent); } 1213 1214 unittest 1215 { 1216 Document d = `<div><p></p><!--hmm--><i></i><!--ohh--></div>`; 1217 1218 Element p = d.byTagName("p").front; 1219 Element i = d.byTagName("i").front; 1220 1221 assert(i.prev == p); 1222 assert(p.next == i); 1223 assert(p.next(true).innerText == "hmm"); 1224 assert(i.prev(true).innerText == "hmm"); 1225 1226 assert(i.next == null); 1227 assert(i.next(true).innerText == "ohh"); 1228 } 1229 1230 /// The first child 1231 @property Element firstChild(bool includeAllElements = false) 1232 { 1233 onlyValidElements(); 1234 auto el = element.node.first_child; 1235 1236 with(lxb_dom_node_type_t) 1237 { 1238 1239 while(el != null) 1240 { 1241 if (cast(int)el.type == cast(int)LXB_DOM_NODE_TYPE_ELEMENT) 1242 break; 1243 1244 if (includeAllElements) 1245 break; 1246 1247 el = el.next; 1248 } 1249 } 1250 1251 return Element(docPayload, cast(lxb_dom_element_t*)el); 1252 } 1253 1254 /// The last child 1255 @property Element lastChild(bool includeAllElements = false) 1256 { 1257 onlyValidElements(); 1258 auto el = element.node.last_child; 1259 1260 with(lxb_dom_node_type_t) 1261 { 1262 while(el != null) 1263 { 1264 if (cast(int)el.type == cast(int)LXB_DOM_NODE_TYPE_ELEMENT) 1265 break; 1266 1267 if (includeAllElements) 1268 break; 1269 1270 el = el.prev; 1271 } 1272 } 1273 1274 return Element(docPayload, cast(lxb_dom_element_t*)el); 1275 } 1276 1277 unittest 1278 { 1279 Document d = "<p><!--hello--><b></b>text</p>"; 1280 1281 assert(d.body.firstChild.firstChild == "<b></b>"); 1282 assert(d.body.firstChild.lastChild == "<b></b>"); 1283 assert(d.body.firstChild.firstChild(true) == "<!--hello-->"); 1284 assert(d.body.firstChild.lastChild(true) == "text"); 1285 } 1286 1287 /// All the children contained in this element. (deep search) 1288 @property auto descendants(VisitOrder order = VisitOrder.Normal)(bool returnAllElements = false) { onlyRealElements(); return new ChildrenElementRange!order(docPayload, element, true, returnAllElements); } 1289 /// All the children contained in this element. (non-deep search) 1290 @property auto children(VisitOrder order = VisitOrder.Normal)(bool returnAllElements = false) { onlyRealElements(); return new ChildrenElementRange!order(docPayload, element, false, returnAllElements); } 1291 1292 /// 1293 unittest 1294 { 1295 Document d = "<p><b>test</b>test2</p>"; 1296 1297 import std.array; 1298 Element[] c = d.body.firstChild.children(true).array; 1299 assert(c.length == 2); 1300 assert(c[0] == "<b>test</b>"); 1301 assert(c[1] == "test2"); 1302 } 1303 1304 unittest 1305 { 1306 Document d = "<p><!--comment-->text"; 1307 1308 Element p = d.byTagName("p").frontOrThrow; 1309 1310 assert(p.children.empty == true); 1311 assert(p.children(false).empty == true); 1312 assert(p.children(true).empty == false); 1313 1314 } 1315 1316 alias canFind = contains; 1317 1318 /// 1319 @property bool contains(E = Element)(auto ref E e, bool deep = true) 1320 { 1321 onlyRealElements(); 1322 1323 if (e == this) return false; 1324 1325 Element tmp = e; 1326 while(tmp != null) 1327 { 1328 if (tmp == this) return true; 1329 else if (!deep) return false; 1330 else tmp = tmp.parent(); 1331 } 1332 1333 return false; 1334 } 1335 1336 /// 1337 @property bool isAncestorOf(E = element)(auto ref E e) { onlyRealElements(); return this.contains(e); } 1338 /// 1339 @property bool isDescendantOf(E = element)(auto ref E e) { return e.contains(this); } 1340 1341 unittest 1342 { 1343 import std.stdio; 1344 import std.exception : assertThrown; 1345 1346 Document d = "<html><p><!--hey<b>--><a>hello</a>"; 1347 Element p = d.byTagName("p").front; 1348 1349 Element c = p.children(true).front; 1350 assert(c.innerText == "hey<b>"); 1351 assertThrown(c.hasAttribute("hello") == false); 1352 assertThrown(c.children.empty); 1353 assertThrown(c.byTagName("b").empty); 1354 assert(c.name == "!--"); 1355 1356 assert(p.contains(c)); 1357 assert(p.canFind(c)); 1358 1359 assert(!d.head.canFind(c)); 1360 } 1361 1362 unittest 1363 { 1364 import std.array; 1365 Document doc = Document("<html><p><p><p><p>"); 1366 assert(doc.body.descendants.array.length == 4); 1367 1368 auto range = doc.body.descendants; 1369 auto original = range.save; 1370 1371 range.popFront; 1372 1373 auto saved = range.save; 1374 1375 range.popFront; 1376 1377 assert(range.array.length == 2); 1378 assert(saved.array.length == 3); 1379 assert(original.array.length == 4); 1380 1381 assert(doc.body.descendants.array == doc.body.children.array); 1382 } 1383 1384 unittest 1385 { 1386 import std.array; 1387 1388 Document d = `<html><p><a><b>`; 1389 1390 assert(d.body.children.array.length == 1); 1391 assert(d.body.descendants.array.length == 3); 1392 assert(d.body.children.front.toString == d.body.descendants.front.toString); 1393 assert(d.body.descendants.array[0].name == "p"); 1394 assert(d.body.descendants.array[1].name == "a"); 1395 1396 auto b = d.byTagName("b").front; 1397 auto p = d.byTagName("p").front; 1398 assert(p.contains(b)); 1399 assert(!p.contains(b, false)); 1400 1401 assert(p.isAncestorOf(b)); 1402 assert(b.isDescendantOf(p)); 1403 assert(!p.isDescendantOf(b)); 1404 assert(!b.isAncestorOf(p)); 1405 1406 assert(!p.isDescendantOf(p)); 1407 assert(!b.isAncestorOf(b)); 1408 1409 1410 } 1411 1412 1413 unittest 1414 { 1415 import std.stdio; 1416 import std.algorithm : map; 1417 import std.array; 1418 1419 Document d = 1420 `<p> 1421 <b> 1422 <i> 1423 </i> 1424 <a> 1425 </a> 1426 </b> 1427 </p> 1428 <br>`; 1429 1430 { 1431 string trip = d.body.descendants.map!(x => x.name).join("->"); 1432 string tripReverse = d.body.descendants!(VisitOrder.Reverse).map!(x => x.name).join("->"); 1433 1434 assert(trip == "p->b->i->a->br"); 1435 assert(tripReverse == "br->p->b->a->i"); 1436 } 1437 1438 { 1439 string trip = d.body.children.map!(x => x.name).join("->"); 1440 string tripReverse = d.body.children!(VisitOrder.Reverse).map!(x => x.name).join("->"); 1441 1442 assert(trip == "p->br"); 1443 assert(tripReverse == "br->p"); 1444 } 1445 1446 } 1447 1448 /// 1449 @property string outerHTML() { onlyValidElements(); return toString(); } 1450 /// 1451 @property void outerHTML(string html) 1452 { 1453 onlyValidElements(); 1454 Element fragment = owner.fragment(html); 1455 1456 import std.array; 1457 foreach(ref c; fragment.children.array) 1458 prependSibling(c); 1459 1460 remove(); 1461 } 1462 1463 /// 1464 unittest 1465 { 1466 Document d = "<html><a><p>"; 1467 1468 auto p = d.byTagName("p").front; 1469 p.outerHTML = "<div><a>"; 1470 1471 assert(p.name == "p"); 1472 assert(d.body.descendants.front.name == "a"); 1473 } 1474 1475 /// 1476 string toString(bool deep = true) const 1477 { 1478 onlyValidElements(); 1479 1480 extern(C) lxb_status_t cb(const lxb_char_t *data, size_t len, void *ctx) 1481 { 1482 *(cast(string*)ctx) ~= cast(string)data[0..len]; 1483 return lexbor_status_t.LXB_STATUS_OK; 1484 } 1485 1486 string output; 1487 1488 1489 if (deep) lxb_html_serialize_tree_cb(cast(lxb_dom_node*)&(element.node), &cb, &output); 1490 else lxb_html_serialize_cb(cast(lxb_dom_node*)&(element.node), &cb, &output); 1491 1492 return output; 1493 } 1494 1495 this(ref return scope typeof(this) rhs) 1496 { 1497 if (rhs.element == null) return; 1498 element = rhs.element; 1499 docPayload = rhs.docPayload; 1500 1501 Element.RefCounter.add(element); 1502 Document.RefCounter.add(docPayload); 1503 } 1504 1505 1506 auto opAssign(typeof(null) n) 1507 { 1508 if (element != null) 1509 Element.RefCounter.remove(element); 1510 1511 if (docPayload != null) 1512 Document.RefCounter.remove(docPayload); 1513 1514 docPayload = null; 1515 element = null; 1516 } 1517 1518 auto opAssign(const string html) 1519 { 1520 if (owner == null) 1521 throw new Exception("Can't set html for a null element"); 1522 1523 Element fragment = owner.fragment(html); 1524 1525 auto cld = fragment.children; 1526 Element first = cld.frontOrInit(); 1527 1528 if (first == null) 1529 throw new Exception("Can't assign: invalid fragment"); 1530 1531 cld.popFront(); 1532 1533 if (!cld.empty) 1534 throw new Exception("Can't assign a fragment with more than one child"); 1535 1536 copyFrom(first, true); 1537 1538 } 1539 1540 auto opCast(string)() 1541 { 1542 onlyValidElements(); 1543 return toString(); 1544 } 1545 1546 unittest 1547 { 1548 import std.exception; 1549 Element e; 1550 assertThrown(e = `<a href="hmm.html>blah</a>`); 1551 } 1552 1553 unittest 1554 { 1555 Document d = "<p>"; 1556 Element e = d.byTagName("p").front; 1557 1558 string se = cast(string)e; 1559 string de = cast(string)d; 1560 1561 assert(se == "<p></p>"); 1562 assert(de == "<html><head></head><body><p></p></body></html>"); 1563 } 1564 1565 unittest 1566 { 1567 import std.exception : assertThrown; 1568 1569 Document d = "<p><a><b>"; 1570 assert(d == "<html><head></head><body><p><a><b></b></a></p></body></html>"); 1571 1572 Element e = d.byTagName("b").front; 1573 e = "<i><span>"; 1574 1575 assert(e == "<i><span></span></i>"); 1576 assert(d.body == "<body><p><a><i><span></span></i></a></p></body>"); 1577 assertThrown(e = "<i></i><b></b>"); 1578 assertThrown(e = ""); 1579 1580 e.outerHTML = "<b></b>"; 1581 assert(e == "<i><span></span></i>"); 1582 assert(d.body == "<body><p><a><b></b></a></p></body>"); 1583 } 1584 1585 auto opAssign(typeof(this) rhs) 1586 { 1587 auto oldPayload = docPayload; 1588 auto oldElement = element; 1589 1590 docPayload = rhs.docPayload; 1591 element = rhs.element; 1592 1593 if (docPayload != null) 1594 Document.RefCounter.add(docPayload); 1595 1596 if (element != null) 1597 Element.RefCounter.add(element); 1598 1599 if (oldElement != null) 1600 Element.RefCounter.add(oldElement); 1601 1602 if (oldPayload != null) 1603 Document.RefCounter.remove(oldPayload); 1604 1605 return this; 1606 } 1607 1608 ~this() { 1609 if (element == null) return; 1610 Element.RefCounter.remove(element); 1611 Document.RefCounter.remove(docPayload); 1612 } 1613 1614 private: 1615 Document.DocumentPayload* docPayload; 1616 1617 lxb_dom_element_t* element; 1618 1619 this(Document.DocumentPayload* docPayload, lxb_dom_element_t* element) 1620 { 1621 this.docPayload = docPayload; 1622 this.element = element; 1623 1624 Document.RefCounter.add(docPayload); 1625 Element.RefCounter.add(element); 1626 } 1627 1628 struct RefCounter 1629 { 1630 static void add(lxb_dom_element_t* resource) 1631 { 1632 //trace("ADD +1 element: ", resource); 1633 if (resource == null) return; 1634 1635 auto r = (resource in refs); 1636 if (r is null) refs[resource] = 1; 1637 else atomicFetchAdd(*r, 1); 1638 } 1639 1640 static void remove(lxb_dom_element_t* resource) 1641 { 1642 //trace("REMOVE -1 element: ", resource); 1643 if (resource == null) return; 1644 1645 size_t pre = atomicFetchSub(refs[resource], 1); 1646 1647 if (pre == 1) 1648 { 1649 //trace("DELETE REQ element: ", resource); 1650 if (resource.node.owner_document == null) 1651 { 1652 //trace("DELETED element: ", resource); 1653 lxb_dom_element_destroy(resource); 1654 } 1655 1656 refs.remove(resource); 1657 } 1658 } 1659 1660 private: 1661 __gshared size_t[lxb_dom_element_t*] refs; 1662 } 1663 1664 } 1665 1666 1667 class ChildrenElementRange(VisitOrder order = VisitOrder.Normal) 1668 { 1669 ChildrenElementRange save() 1670 { 1671 auto newRange = new ChildrenElementRange!order(docPayload, element, recursive, returnAllElements); 1672 newRange.current = current; 1673 newRange.destroyed = destroyed; 1674 return newRange; 1675 } 1676 1677 Element front() { 1678 return Element(docPayload, current); 1679 } 1680 1681 void popFront() 1682 { 1683 while(true) 1684 { 1685 static if (order == VisitOrder.Reverse) 1686 { 1687 auto child = current.node.last_child; 1688 auto next = current.node.prev; 1689 } 1690 else 1691 { 1692 auto child = current.node.first_child; 1693 auto next = current.node.next; 1694 } 1695 1696 if (recursive && child != null) current = cast(lxb_dom_element*)(child); 1697 else if (next != null) current = cast(lxb_dom_element*)(next); 1698 else 1699 { 1700 while(true) 1701 { 1702 if (cast(lxb_dom_element*)current.node.parent == element) { current = null; break; } 1703 else 1704 { 1705 // Node removed during browsing, exit 1706 if (current.node.parent == null) 1707 { 1708 current = null; 1709 break; 1710 } 1711 1712 static if (order == VisitOrder.Reverse) auto candidate = cast(lxb_dom_element*)(current.node.parent.prev); 1713 else auto candidate = cast(lxb_dom_element*)(current.node.parent.next); 1714 1715 if (candidate == null) current = cast(lxb_dom_element*)current.node.parent; 1716 else 1717 { 1718 current = candidate; 1719 break; 1720 } 1721 } 1722 } 1723 } 1724 1725 if (current == null) break; 1726 auto type = cast(int)current.node.type; 1727 1728 if (type == cast(int)lxb_dom_node_type_t.LXB_DOM_NODE_TYPE_ELEMENT) break; 1729 else if (returnAllElements) break; 1730 } 1731 1732 if (empty) unref(); 1733 } 1734 1735 @property bool empty() 1736 { 1737 return current == null; 1738 } 1739 1740 ~this() { 1741 unref(); 1742 } 1743 1744 private: 1745 1746 void unref() 1747 { 1748 if (!destroyed) 1749 { 1750 Element.RefCounter.remove(element); 1751 Document.RefCounter.remove(docPayload); 1752 } 1753 1754 destroyed = true; 1755 } 1756 1757 @disable this(); 1758 1759 this(Document.DocumentPayload* docPayload, lxb_dom_element_t* e, bool recursive, bool returnAllElements) 1760 { 1761 this.docPayload = docPayload; 1762 element = e; 1763 1764 static if (order == VisitOrder.Reverse) current = cast(lxb_dom_element*)(e.node.last_child); 1765 else current = cast(lxb_dom_element*)(e.node.first_child); 1766 1767 this.recursive = recursive; 1768 this.returnAllElements = returnAllElements; 1769 1770 Document.RefCounter.add(docPayload); 1771 Element.RefCounter.add(e); 1772 1773 if (current != null && current.node.type != lxb_dom_node_type_t.LXB_DOM_NODE_TYPE_ELEMENT && returnAllElements == false) 1774 popFront(); 1775 1776 if (empty) 1777 unref(); 1778 } 1779 1780 bool destroyed = false; 1781 bool recursive; 1782 bool returnAllElements; 1783 1784 lxb_dom_element_t* current = null; 1785 1786 Document.DocumentPayload* docPayload; 1787 lxb_dom_element_t* element; 1788 } 1789 1790 1791 class SelectorElementRange 1792 { 1793 Element front() { return Element(docPayload, current); } 1794 1795 void popFront() 1796 { 1797 fiber.call(); 1798 1799 if (fiber.state == Fiber.State.TERM) 1800 current = null; 1801 1802 if (empty) 1803 unref(); 1804 } 1805 1806 @property bool empty() { return current is null; } 1807 1808 ~this() { 1809 unref(); 1810 } 1811 1812 private: 1813 1814 void unref() 1815 { 1816 if (!destroyed) 1817 { 1818 lxb_selectors_destroy(selectors,true); 1819 lxb_css_selector_list_destroy_memory(list); 1820 Element.RefCounter.remove(element); 1821 Document.RefCounter.remove(docPayload); 1822 } 1823 1824 destroyed = true; 1825 } 1826 1827 @disable this(); 1828 1829 1830 extern(C) static lxb_status_t find_callback(lxb_dom_node_t *node, void *spec, void *ctx) 1831 { 1832 auto current = (cast(lxb_dom_element_t**)ctx); 1833 *current = null; 1834 1835 if (cast(int)node.type == cast(int)lxb_dom_node_type_t.LXB_DOM_NODE_TYPE_ELEMENT) 1836 { 1837 *current = cast(lxb_dom_element_t*)node; 1838 Fiber.yield(); 1839 1840 if(*current == null) 1841 return lexbor_status_t.LXB_STATUS_ERROR; 1842 } 1843 1844 return lexbor_status_t.LXB_STATUS_OK; 1845 } 1846 1847 this(Document.DocumentPayload* docPayload, lxb_dom_element_t* e, string selector) 1848 { 1849 this.docPayload = docPayload; 1850 this.element = e; 1851 1852 selectors = lxb_selectors_create(); 1853 lxb_selectors_init(selectors); 1854 list = CallWithLexborString!lxb_css_selectors_parse(docPayload.parser, selector); 1855 1856 Document.RefCounter.add(docPayload); 1857 Element.RefCounter.add(element); 1858 1859 fiber = new CBFiber(); 1860 fiber.call(); 1861 1862 if (empty) 1863 unref(); 1864 } 1865 1866 class CBFiber : Fiber { 1867 1868 this() { 1869 super({lxb_selectors_find(selectors, &(element.node), list, &find_callback, ¤t);}); 1870 } 1871 1872 ~this() { 1873 current = null; 1874 if (state == Fiber.State.HOLD) 1875 call(); 1876 } 1877 } 1878 1879 bool destroyed = false; 1880 Fiber fiber = null; 1881 1882 lxb_dom_element_t* current = null; 1883 lxb_dom_element_t* element = null; 1884 1885 Document.DocumentPayload* docPayload; 1886 lxb_selectors_t* selectors; 1887 lxb_css_selector_list_t* list; 1888 1889 } 1890 1891 import std.range : isInputRange, ElementType; 1892 import std.traits : ReturnType; 1893 1894 /// Get the first element of a range or throw an exception 1895 auto frontOrThrow(T)(T range) 1896 if (isInputRange!T) 1897 { 1898 if (range.empty) throw new Exception("Range is empty."); 1899 else return range.front; 1900 } 1901 1902 /// Get the first element of a range or return the second args 1903 auto frontOr(T, El)(T range, El fallback) 1904 if (isInputRange!T && is(El == ElementType!T)) 1905 { 1906 if (range.empty) return fallback; 1907 else return range.front; 1908 } 1909 1910 /// Get the first element of a range or return Element.init 1911 auto frontOrInit(T)(T range) 1912 if (isInputRange!T) 1913 { 1914 if (range.empty) return (ElementType!T).init; 1915 else return range.front; 1916 } 1917 1918 unittest 1919 { 1920 import std.exception : assertThrown; 1921 1922 Document doc = Document(`<html><p><b>hello`); 1923 Element div = doc.createElement("div"); 1924 div.setAttribute("id", "test"); 1925 1926 assert(doc.bySelector("p b").frontOrThrow.name == "b"); 1927 assert(doc.bySelector("p b").frontOrInit.name == "b"); 1928 1929 assertThrown(doc.bySelector("p i").frontOrThrow); 1930 assert(doc.bySelector("p i").frontOrInit == Element()); 1931 assert(doc.bySelector("p i").frontOrInit == null); 1932 assert(doc.bySelector("p i").frontOr(div).name == "div"); 1933 assert(doc.bySelector("p i").frontOr(div).id == "test"); 1934 } 1935 1936 private struct FragmentString 1937 { 1938 string fragment; 1939 alias fragment this; 1940 } 1941 1942 auto asFragment(string s) 1943 { 1944 return cast(FragmentString)s; 1945 } 1946 1947 private auto CallWithLexborString(alias T, A...)(A params, string str) { return T(params, str.representation.ptr, str.representation.length); } 1948 1949 private string ReturnLexborString(alias T, A...)(A params) 1950 { 1951 size_t len; 1952 auto r = T(params, &len); 1953 return cast(string)r[0..len]; 1954 }