sourceafSizzle::SizzleDoc.fan

using xml

// TODO: Make SizzleDoc const - we shouldn't need to cache the single element buckets - which could be simplified anyway

** Holds a representation of an XML document that may be queried with CSS selectors. 
** 
** 'SizzleDoc' is intended for re-use with multiple CSS selections:
**
**    doc    := SizzleDoc("""<html><p class="welcome">Hello from Sizzle!</p></html>""")
**    elems1 := doc.select("p.welcome")
**    elems2 := doc.select("html p")
** 
class SizzleDoc {
    
    private static const Str    selectorStr     := Str<| \s+(\w1+)?(#\w1+)?(\.[\w2\.]+)?((?:\[[^\]]+\])+)?(?:\s*([>+]))? |>.trim.replace("\\w1", Str<| [^#\.\s\[\]\<\+] |>.trim).replace("\\w2", Str<| [^#\s\[\]\<\+] |>.trim)
    private static const Regex  selectorRegex   := Regex.fromStr(selectorStr)
    
    private Str:DomBucket   buckets := Str:DomBucket[:] { caseInsensitive = true }
    private XElem           root
    private DomBucket       rootBucket
    
    private new make(XElem elem) {
        this.root = elem
        this.rootBucket = DomBucket(elem, true)
    }

    ** Create a 'SizzleDoc' from an XML string.
    static new fromStr(Str xml) {
        fromXDoc(XParser(xml.in).parseDoc)
    }

    ** Create a 'SizzleDoc' from an XML document.
    static new fromXDoc(XDoc doc) {
        fromXElem(doc.root)
    }

    ** Create a 'SizzleDoc' from an XML element.
    static new fromXElem(XElem elem) {
        SizzleDoc.make(elem)
    }

    ** Queries the xml document with the given CSS selector any returns any matching elements.
    @Operator
    XElem[] get(Str cssSelector) {
        matcher := selectorRegex.matcher(" " + cssSelector)
        
        selectors := Selector[,]
        while (matcher.find) {
            selectors.add(Selector(matcher))
        }       

        if (selectors.isEmpty)
            throw SizzleErr(ErrMsgs.selectorNotValid(cssSelector))
        
        got := rootBucket.select(selectors.last)
        
        if (selectors.size == 1)
            return got

        Env.cur.err.printLine(selectors)
        
        got = got.findAll |element| {
            elem := (XNode?) element
            matches := true
            selectors[0..<-1].eachr |sel| {
                elem = findElemMatch(elem, sel)
                Env.cur.err.printLine(elem)
                if (!isElement(elem)) {
                    matches = false
                    return
                }
            }
            return matches
        }
        
        return got
    }
    
    ** An alias for 'get()'
    XElem[] select(Str selector) {
        get(selector)
    }
    
    private XNode? findElemMatch(XNode? elem, Selector selector) {
        if (selector.combinator == Combinator.descendant) {
            elem = elem?.parent
            while (isElement(elem)) {
                bucket := buckets.getOrAdd(pathTo(elem)) { DomBucket(elem, false) }
                matches := bucket.select(selector)
                if (matches.size == 1)
                    return matches.first
                elem = elem.parent
            }
            return null
        }
        
        if (selector.combinator == Combinator.child) {
            elem = elem?.parent
            if (!isElement(elem))
                return null
            bucket := buckets.getOrAdd(pathTo(elem)) { DomBucket(elem, false) }
            matches := bucket.select(selector)
            if (matches.size == 1)
                return matches.first
            return null
        }

        if (selector.combinator == Combinator.sibling) {
            parent := elem?.parent
            if (!isElement(parent))
                return null
            index := (parent as XElem).elems.indexSame(elem) - 1
            if (index < 1)
                return null
            elem = (parent as XElem).elems.getSafe(index-1)
            if (!isElement(elem))
                return null         
            bucket := buckets.getOrAdd(pathTo(elem)) { DomBucket(elem, false) }
            matches := bucket.select(selector)
            if (matches.size == 1)
                return matches.first
            return null
        }
        
        return null
    }
    
    internal static Str pathTo(XElem elem, StrBuf path := StrBuf()) {
        name := "/${elem.name}"
        if (isElement(elem.parent)) {
            sibs := ((XElem) elem.parent).elems.findAll { it.name == elem.name }
            if (sibs.size > 1)
                name += "[" + sibs.indexSame(elem).toStr + "]"
            pathTo(elem.parent, path)
        }
        path.add(name)
        return path.toStr
    }

    internal static XElem[] path(XElem elem, XElem[] elemPath := XElem[,]) {
        if (isElement(elem.parent)) {
            path(elem.parent, elemPath)
        }
        return elemPath.add(elem)
    }
    
    private static Bool isElement(XNode? node) {
        (node as XElem) != null
    }
}


internal class DomBucket {
    
    ElemBucket      typeBucket  := ElemBucket() 
    ElemBucket      classBucket := ElemBucket() 
    Str:ElemBucket  attrBuckets := Str:ElemBucket[:] { caseInsensitive = true }
    
    new make(XElem elem, Bool recurse) {
        walk(elem, recurse)
    }
    
    private Void walk(XElem elem, Bool recurse) {
        typeBucket[elem.name] = elem
        
        elem.attr("class", false)?.val?.split?.each {
            classBucket[it] = elem
        }

        elem.attrs.each {
            bucket := attrBuckets.getOrAdd(it.name) { ElemBucket() }
            bucket[it.val.trim] = elem
        }
        
        if (recurse)
            elem.elems.each { walk(it, recurse) }
    }

    XElem[] select(Selector selector) {
        types   := (selector.type == "*") ? typeBucket.all : typeBucket[selector.type]
        gotten  := types
        
        if (!selector.id.isEmpty) {
            ids     := attrBuckets["id"]?.get(selector.id)
            gotten  = gotten.intersection(ids)
        }

        selector.classes.each {
            css := classBucket[it]
            gotten  = gotten.intersection(css)
        }

        return gotten
    }
}

internal class ElemBucket {
    Str:XElem[] elems   := [:]

    @Operator
    XElem[] get(Str? name) {
        elems[name ?: Str.defVal] ?: XElem#.emptyList
    }

    @Operator
    Void set(Str name, XElem elem) {
        elems.getOrAdd(name) { XElem[,] }.add(elem)
    }
    
    once XElem[] all() {
        elems.vals.flatten.unique
    }
}

internal const class Selector {
    
    const Str   type
    const Str   id
    const Str[] classes
    const Str   attrs
    const Combinator    combinator
    
    new make(RegexMatcher matcher) {
        this.type       = matcher.group(1) ?: "*"
        this.id         = matcher.group(2)?.getRange(1..-1) ?: ""
        this.classes    = matcher.group(3)?.split('.')?.exclude { it.isEmpty } ?: Str#.emptyList
        this.attrs      = matcher.group(4) ?: ""
        this.combinator = Combinator.fromCombinator(matcher.group(5) ?: "")
    }
    
    override Str toStr() {
        sId := id.isEmpty ? "" : "#${id}"
        sClasses := classes.isEmpty ? "" : "." + classes.join(".") 
        return "${type}${sId}${sClasses}"
    }
}

internal enum class Combinator {
    descendant  (""),
    child       (">"),
    sibling     ("+");
    
    const Str combinator;
    
    private new make(Str combinator) {
        this.combinator = combinator
    }
    
    static Combinator fromCombinator(Str combinator) {
        Combinator.vals.find { it.combinator == combinator } ?: throw Err("Combinator '${combinator}' not known")
    }
}