o
    MK&h>Y                     @   sJ  d Z dgZddlmZmZmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZmZmZmZmZ ddlmZmZmZ ddl Z dd	l!m"Z" dd
lm#Z#m$Z$m%Z%m&Z& e	rkddl'm(Z( ddl)m*Z+ G dd deZ,G dd de+j-Z.G dd de/Z0G dd de+j1Z2G dd de2Z3G dd de2Z4dS )MITHTML5TreeBuilder    )	AnycastDictIterableOptionalSequenceTYPE_CHECKINGTupleUnion)	TypeAlias)_AttributeValue_AttributeValues	_Encoding
_Encodings_NamespaceURL
_RawMarkupN)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributePageElementnonwhitespace_re)
namespaces)CommentDoctypeNavigableStringTagBeautifulSoup)basec                   @   s   e Zd ZU dZdZeed< eeee	gZ
ee ed< dZeed< ded< ee ed	< 	
	
	
dded	ee dee dee deeeee ee ef  f
ddZdedd
fddZdeddfddZdedefddZd
S )r   aj  Use `html5lib <https://github.com/html5lib/html5lib-python>`_ to
    build a tree.

    Note that `HTML5TreeBuilder` does not support some common HTML
    `TreeBuilder` features. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    Specifically:

    * This `TreeBuilder` doesn't use different subclasses of
      `NavigableString` (e.g. `Script`) based on the name of the tag
      in which the string was found.
    * You can't use a `SoupStrainer` to parse only part of a document.
    html5libNAMEfeaturesTTRACKS_LINE_NUMBERSTreeBuilderForHtml5libunderlying_builderuser_specified_encodingNmarkupdocument_declared_encodingexclude_encodingsreturnc                 c   sb    || _ |df|dffD ]\}}|r tjd| d| ddd qtj|dd |d d dfV  d S )	Nr,   r-   zYou provided a value for z0, but the html5lib tree builder doesn't support .   
stacklevelF)r*   warningswarnr   warn_if_markup_looks_like_xml)selfr+   r*   r,   r-   variablename r9   H/var/www/html/venv/lib/python3.10/site-packages/bs4/builder/_html5lib.pyprepare_markupW   s   zHTML5TreeBuilder.prepare_markupc                 C   s   | j dur| j jdurtjddd tj| jd}| jdus J || j_t	 }t
|ts1| j|d< |j|fi |}t
|trCd|_n|jjjd }|j}||_d| j_dS )zRun some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in `HTML5TreeBuilder.soup`.
        NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r1   )treeoverride_encodingr   )soup
parse_onlyr3   r4   r$   
HTMLParsercreate_treebuilderr)   parserdict
isinstancestrr*   parseoriginal_encoding	tokenizerstreamcharEncodingr8   )r6   r+   rC   extra_kwargsdocrH   r9   r9   r:   feedu   s$   


zHTML5TreeBuilder.feednamespaceHTMLElementsc                 C   s   t || j| jd| _| jS )zCalled by html5lib to instantiate the kind of class it
        calls a 'TreeBuilder'.

        :param namespaceHTMLElements: Whether or not to namespace HTML elements.

        :meta private:
        )store_line_numbers)r(   r?   rP   r)   )r6   rO   r9   r9   r:   rB      s   

z#HTML5TreeBuilder.create_treebuilderfragmentc                 C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r9   )r6   rQ   r9   r9   r:   test_fragment_to_document   s   z*HTML5TreeBuilder.test_fragment_to_document)NNN)__name__
__module____qualname____doc__r%   rF   __annotations__r   r   r   r&   r	   r'   boolr   r   r   r   r   r   r;   rN   rB   rR   r9   r9   r9   r:   r   ;   s8   
 
%
c                	       s   e Zd ZU ded< eej ed< 		d&deded dedef fd	d
Z	d'ddZ
deeef ddfddZdededdfddZdeddfddZd'ddZd'ddZd(dd Zd)d!d"Zd#ddefd$d%Z  ZS )*r(   r"   r?   rC   NTrO   rP   kwargsc                    s^   |r|| _ ntjdtdd ddlm} |	d	d|i|| _ tt| | d | _	|| _
d S )
NaK  The optionality of the 'soup' argument to the TreeBuilderForHtml5lib constructor is deprecated as of Beautiful Soup 4.13.0: 'soup' is now required. If you can't pass in a BeautifulSoup object here, or you get this warning and it seems mysterious to you, please contact the Beautiful Soup developer team for possible un-deprecation.   r1   r   r!    html.parserrP   )r[   r\   )r?   r3   r4   DeprecationWarningbs4r"   superr(   __init__rC   rP   )r6   rO   r?   rP   rY   r"   	__class__r9   r:   r`      s$   
zTreeBuilderForHtml5lib.__init__r.   Elementc                 C   s   | j   t| j | j d S N)r?   resetrc   r6   r9   r9   r:   documentClass   s   
z$TreeBuilderForHtml5lib.documentClasstokenc                 C   sP   t t|d }t tt |d }t tt |d }t|||}| j| d S )Nr8   publicIdsystemId)r   rF   r   r   for_name_and_idsr?   object_was_parsed)r6   rh   r8   ri   rj   doctyper9   r9   r:   insertDoctype   s
   z$TreeBuilderForHtml5lib.insertDoctyper8   	namespacec                 C   s`   d }d }| j d ur| jr| j jj \}}|d usJ |d }| jj||||d}t|| j|S )N   )
sourceline	sourcepos)rC   rP   rI   rJ   positionr?   new_tagrc   )r6   r8   ro   rq   rr   tagr9   r9   r:   elementClass   s   z#TreeBuilderForHtml5lib.elementClassdataTextNodec                 C   s   t t|| jS rd   )rx   r   r?   )r6   rw   r9   r9   r:   commentClass      z#TreeBuilderForHtml5lib.commentClassc                 C      t  )zThis is only used by html5lib HTMLParser.parseFragment(),
        which is never used by Beautiful Soup, only by the html5lib
        unit tests. Since we don't currently hook into those tests,
        the implementation is left blank.
        NotImplementedErrorrf   r9   r9   r:   fragmentClass   s   z$TreeBuilderForHtml5lib.fragmentClassc                 C   r{   zThis is only used by the html5lib unit tests. Since we
        don't currently hook into those tests, the implementation is
        left blank.
        r|   rf   r9   r9   r:   getFragment      z"TreeBuilderForHtml5lib.getFragmentnodec                 C   s   | j |j d S rd   )r?   appendelementr6   r   r9   r9   r:   appendChild  s   z"TreeBuilderForHtml5lib.appendChildc                 C   s   | j S rd   )r?   rf   r9   r9   r:   getDocument
     z"TreeBuilderForHtml5lib.getDocumentr   c                 C   r{   r   r|   r6   r   r9   r9   r:   testSerializer  r   z%TreeBuilderForHtml5lib.testSerializer)NT)r.   rc   r   rc   r.   N)r.   r"   )rS   rT   rU   rW   r   r$   rA   rX   r   r`   rg   r   rF   rn   rv   ry   r~   r   r   r   r   __classcell__r9   r9   ra   r:   r(      s.   
 
!



	r(   c                   @   s   e Zd ZU dZeed< eed< defddZdee	e
ef  fddZd	e
d
eddfddZdee	e
ef  fddZdee
 fddZdefddZd	e
defddZd	e
defddZdS )AttrListz@Represents a Tag's attributes in a way compatible with html5lib.r   attrsc                 C   s   || _ t| j j| _d S rd   )r   rD   r   r   r9   r9   r:   r`     s   zAttrList.__init__r.   c                 C   s   t | j  S rd   )listr   items__iter__rf   r9   r9   r:   r        zAttrList.__iter__r8   valueNc                 C   st   | j jpi }||dg v s| j j|v r3||| j jg v r3t|ts3t|ts*J | j t	|}|| j |< d S )N*)
r   cdata_list_attributesgetr8   rE   r   rF   attribute_value_list_classr   findall)r6   r8   r   	list_attrr9   r9   r:   __setitem__"  s   
zAttrList.__setitem__c                 C      t | j S rd   )r   r   r   rf   r9   r9   r:   r   3     zAttrList.itemsc                 C   r   rd   r   r   keysrf   r9   r9   r:   r   6  r   zAttrList.keysc                 C   s
   t | jS rd   )lenr   rf   r9   r9   r:   __len__9     
zAttrList.__len__c                 C   s
   | j | S rd   )r   r6   r8   r9   r9   r:   __getitem__<  r   zAttrList.__getitem__c                 C   s   |t | j v S rd   r   r   r9   r9   r:   __contains__?  r   zAttrList.__contains__)rS   rT   rU   rV   r    rW   r   r`   r   r   rF   r   r   r   r   r   intr   r   rX   r   r9   r9   r9   r:   r     s   
 r   c                   @   sL   e Zd ZU eed< ded< ee ed< edefddZ	de
jfdd	Zd
S )BeautifulSoupNoder   r"   r?   ro   r.   c                 C   r{   )zReturn the html5lib constant corresponding to the type of
        the underlying DOM object.

        NOTE: This property is only accessed by the html5lib test
        suite, not by Beautiful Soup proper.
        r|   rf   r9   r9   r:   nodeTypeH  s   zBeautifulSoupNode.nodeTypec                 C   r{   rd   r|   rf   r9   r9   r:   	cloneNodeT  r   zBeautifulSoupNode.cloneNodeN)rS   rT   rU   r   rW   r   r   propertyr   r   treebuilder_baseNoder   r9   r9   r9   r:   r   C  s   
 r   c                   @   s,  e Zd ZU eed< ee ed< dedddee fddZd&ddZd	e	fddZ
eeeeef f Zeed< eeef Zeed< dee d	d
fddZee
eZ	
d'deded d	d
fddZ							
d(ddZd)ddZd*ddZd	efd d!Zd	ejfd"d#Zd	eee ef fd$d%ZeeZd
S )+rc   r   ro   r?   r"   c                 C   s&   t j| |j || _|| _|| _d S rd   )r   r   r`   r8   r   r?   ro   )r6   r   r?   ro   r9   r9   r:   r`   \  s   
zElement.__init__r   r   r.   Nc                 C   s  d }t |jtu r|j }}n|j}| |_|d ur(|jd ur(t|ts(|j  |d urS| jjrSt | jjd tu rS| jjd }| j	|| }|
| || j_d S t|tr^| j	|}| jjri| jd}n| jjd uru| j }n| j}| jj|| j|d d S )NF)parentmost_recent_element)typer   r   r   rE   rF   extractcontentsr?   
new_stringreplace_with_most_recent_element_last_descendantnext_elementrl   )r6   r   string_childchildold_elementnew_elementr   r9   r9   r:   r   d  s8   




zElement.appendChildc                 C   s   t | jtri S t| jS rd   )rE   r   r   r   rf   r9   r9   r:   getAttributes  s   
zElement.getAttributes_Html5libAttributeName_Html5libAttributes
attributesc                 C   s   |d urMt |dkrOt| D ]\}}t|tr$t| }||= |||< qtt|}| jj	
| j| t| D ]	\}}|| j|< q9| jj	| j d S d S d S Nr   )r   r   r   rE   tupler   r   r   r?   builder$_replace_cdata_list_attribute_valuesr8   r   set_up_substitutions)r6   r   r8   r   new_namenormalized_attributesvalue_or_valuesr9   r9   r:   setAttributes  s   

zElement.setAttributesrw   insertBeforec                 C   s6   t | j|| j}|r| || d S | | d S rd   )rx   r?   r   r   r   )r6   rw   r   textr9   r9   r:   
insertText  s   zElement.insertTextrefNodec                 C   s   | j |j }t|j tu r>| j jr>t| j j|d  tu r>| j j|d  }t|tu s.J | j||j  }|| d S | j ||j  | |_	d S )Nrp   )
r   indexr   r   r   r?   r   r   insertr   )r6   r   r   r   old_nodenew_strr9   r9   r:   r     s   
zElement.insertBeforec                 C   s   |j   d S rd   )r   r   r   r9   r9   r:   removeChild  r   zElement.removeChild
new_parentc                 C   s  | j }|j }|j}|dd}t|jdkr%|dusJ |jd }|j}nd}|j}|j}t|dkrs|d }	|dur?||	_n||	_||	_|durM|	|_n|	|_|durW|	|_|d jddd}
|
dusfJ ||
_|durp|
|_d|
_|D ]}||_|j	| qug |_||_dS )z1Move all of this tag's children into another tag.Fr   Nr   T)is_initializedaccept_self)
r   next_siblingr   r   r   r   previous_elementprevious_siblingr   r   )r6   r   r   new_parent_elementfinal_next_elementnew_parents_last_descendantnew_parents_last_child(new_parents_last_descendant_next_element	to_appendfirst_childlast_childs_last_descendantr   r9   r9   r:   reparentChildren  sL   

zElement.reparentChildrenc                 C   s   t | jjdkS r   )r   r   r   rf   r9   r9   r:   
hasContent7  rz   zElement.hasContentc                 C   sB   | j | jj| j}t|| j | j}| jD ]	\}}||j|< q|S rd   )r?   rt   r   r8   ro   rc   r   )r6   ru   r   keyr   r9   r9   r:   r   <  s
   zElement.cloneNodec                 C   s$   | j d u rtd | jfS | j | jfS )Nhtml)ro   r   r8   rf   r9   r9   r:   getNameTupleC  s   
zElement.getNameTuple)r   r   r.   Nrd   )r   r   r   r   r.   Nr   )r   rc   r.   N) rS   rT   rU   r    rW   r   r   r`   r   r   r   r   rF   r   r   r   r   r   r   r   r   r   r   r   r   rX   r   r   r   r   r   	nameTupler9   r9   r9   r:   rc   X  sJ   
 

3
 
	


Trc   c                   @   s(   e Zd ZU eed< deddfddZdS )rx   r   r?   r"   c                 C   s   t j| d  || _|| _d S rd   )r   r   r`   r   r?   )r6   r   r?   r9   r9   r:   r`   O  s   
zTextNode.__init__N)rS   rT   rU   r   rW   r`   r9   r9   r9   r:   rx   L  s   
 rx   )5__license____all__typingr   r   r   r   r   r	   r
   r   r   typing_extensionsr   bs4._typingr   r   r   r   r   r   r3   bs4.builderr   r   r   r   r   bs4.elementr   r   r   r$   html5lib.constantsr   r   r   r   r    r^   r"   html5lib.treebuildersr#   r   r   TreeBuilderr(   objectr   r   r   rc   rx   r9   r9   r9   r:   <module>   s,   , 	sg. u