o
    MK&hX                     @   s8  d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ d
dlmZmZmZ ddlZddlmZ G dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&dS )z#Tests of Beautiful Soup as a whole.    N)Iterable)BeautifulSoupGuessedAtParserWarningdammit)TreeBuilder)AttributeValueListXMLAttributeDictCommentPYTHON_SPECIFIC_ENCODINGSTagNavigableString)SoupStrainerParserRejectedMarkup)MarkupResemblesLocatorWarning   )default_builderLXML_PRESENTSoupTest)Typec                   @   sl   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
ejddddd gdd ZdS )TestConstructorc                 C   "   d}|  |}d|jjksJ d S )Nu   <h1>éé</h1>u   éésouph1stringselfdatar    r   F/var/www/html/venv/lib/python3.10/site-packages/bs4/tests/test_soup.pytest_short_unicode_input,      
z(TestConstructor.test_short_unicode_inputc                 C   r   )Nz<h1>foo bar</h1>zfoo barr   r   r   r   r    test_embedded_null1   r"   z"TestConstructor.test_embedded_nullc                 C   s,   d d}| j|dgd}d|jksJ d S )Nu   Räksmörgåsutf-8)exclude_encodingszwindows-1252)encoder   original_encoding)r   	utf8_datar   r   r   r    test_exclude_encodings6   s   
z&TestConstructor.test_exclude_encodingsc                 C   s  G dd dt }tddd}tjdd tdd|i|}W d    n1 s(w   Y  t|j|s5J tdd	|jjks@J d
|jjksHJ |di |}tjdd}td|dd}W d    n1 sgw   Y  t	|d j
}|dszJ ||jksJ ||jksJ d S )Nc                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Ze ZZ	dd Z
dS )z7TestConstructor.test_custom_builder_class.<locals>.Mockc                 [   s4   || _ d| _d| _g | _g | _i | _t| _t| _	d S )NTF)
called_withis_xmlstore_line_numberscdata_list_attributespreserve_whitespace_tagsstring_containersr   attribute_dict_classr   attribute_value_list_class)r   kwargsr   r   r    __init__?   s   
z@TestConstructor.test_custom_builder_class.<locals>.Mock.__init__c                 S      d S Nr   r   r   r   r   r    initialize_soupI      zGTestConstructor.test_custom_builder_class.<locals>.Mock.initialize_soupc                 S   s
   || _ d S r5   )fed)r   markupr   r   r    feedL   s   
z<TestConstructor.test_custom_builder_class.<locals>.Mock.feedc                 S   r4   r5   r   r   r   r   r    resetO   r8   z=TestConstructor.test_custom_builder_class.<locals>.Mock.resetc                 S   r4   r5   r   )r   ignorer   r   r    r>   R   r8   z>TestConstructor.test_custom_builder_class.<locals>.Mock.ignorec                    s    dV  d S )N)prepared markupzoriginal encodingzdeclared encodingzcontains replacement charactersr   r   argsr2   r   r   r    prepare_markupW   s   
zFTestConstructor.test_custom_builder_class.<locals>.Mock.prepare_markupN)__name__
__module____qualname__r3   r7   r;   r=   r>   set_up_substitutionscan_be_empty_elementrB   r   r   r   r    Mock>   s    
rH   valueT)varconvertEntitiesrecord builder)rJ   r?   )rO   ignored_valuer   zCKeyword arguments to the BeautifulSoup constructor will be ignored.)rN   r   )objectdictwarningscatch_warningsr   
isinstancerO   r*   r9   strmessage
startswith)r   rH   r2   r   rO   wmsgr   r   r    test_custom_builder_class;   s2   !z)TestConstructor.test_custom_builder_classc                 C   sd   G dd dt }dd }tt}td|d W d    n1 s"w   Y  dt|jv s0J d S )Nc                   @      e Zd Zdd ZdS )z:TestConstructor.test_parser_markup_rejection.<locals>.Mockc                 _   s   t d)NzNope.r   r@   r   r   r    r;      s   z?TestConstructor.test_parser_markup_rejection.<locals>.Mock.feedN)rC   rD   rE   r;   r   r   r   r    rH          rH   c                    s"    |d d dfV  |d d dfV  d S NFr   )r   r:   rA   r2   r   r   r    rB      s   zDTestConstructor.test_parser_markup_rejection.<locals>.prepare_markuprN   )rO   zoThe markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.)r   pytestraisesr   r   rV   rI   )r   rH   rB   exc_infor   r   r    test_parser_markup_rejection|   s   z,TestConstructor.test_parser_markup_rejectionc              	   C   s   d}|  |}|j}d|d ksJ ddg|d ksJ | j |td d}d|jd ks-J ddiddifD ]5}tjd	d
 | j |d |d}W d    n1 sPw   Y  |j}ddg|d ksbJ d|d ksjJ q5d S )Nz&<a id=" an id " class=" a class "></a>z an id idaclass)rO   multi_valued_attributesz	 a class *TrL   an)r   rd   r   rS   rT   )r   r:   r   rd   
switcheroor   r   r    test_cdata_list_attributes   s"   
z*TestConstructor.test_cdata_list_attributesc                    sn   G dd dt G dd dtG dd dt | jdt tt id}t fd	d
|jD s5J d S )Nc                   @      e Zd ZdS )z9TestConstructor.test_replacement_classes.<locals>.TagPlusNrC   rD   rE   r   r   r   r    TagPlus       rm   c                   @   rk   )z<TestConstructor.test_replacement_classes.<locals>.StringPlusNrl   r   r   r   r    
StringPlus   rn   ro   c                   @   rk   )z=TestConstructor.test_replacement_classes.<locals>.CommentPlusNrl   r   r   r   r    CommentPlus   rn   rp   z<a><b>foo</b>bar</a><!--whee-->)element_classesc                 3   s     | ]}t | fV  qd S r5   )rU   .0xrp   ro   rm   r   r    	<genexpr>   s    
z;TestConstructor.test_replacement_classes.<locals>.<genexpr>)r   r   r	   r   alldescendantsr6   r   ru   r    test_replacement_classes   s   z(TestConstructor.test_replacement_classesc                 C   s   G dd dt }G dd dt }| jd||dd}t|jjd t s%J t|jjd |s0J |jjD ]	}t||s=J q4g |jksEJ d S )	Nc                   @   rk   )zATestConstructor.test_alternate_string_containers.<locals>.PStringNrl   r   r   r   r    PString   rn   rz   c                   @   rk   )zATestConstructor.test_alternate_string_containers.<locals>.BStringNrl   r   r   r   r    BString   rn   r{   z4<div>Hello.<p>Here is <b>some <i>bolded</i></b> text)bp)r/   r   )	r   r   rU   divcontentsr}   r|   stringsstring_container_stack)r   rz   r{   r   sr   r   r     test_alternate_string_containers   s   	z0TestConstructor.test_alternate_string_containers
bad_markupr   Fc                 C   s   dS r^   r   )rt   r   r   r    <lambda>   s    zTestConstructor.<lambda>c                 C   sR   t t}t|d W d    n1 sw   Y  d|dt|jv s'J d S )Nhtml.parserz'Incoming markup is of an invalid type: z?. Markup must be a string, a bytestring, or an open filehandle.)r_   r`   	TypeErrorr   rV   rI   )r   r   ra   r   r   r    test_invalid_markup_type   s   
z(TestConstructor.test_invalid_markup_typeN)rC   rD   rE   r!   r#   r)   r[   rb   rj   ry   r   r_   markparametrizer   r   r   r   r    r   +   s    A!r   c                   @   sT   e Zd Zejdddgdd Zejddd eD d	g d
d Zdd Z	d	S )
TestOutputz!eventual_encoding,actual_encoding)r$   r$   )utf-16r   c                 C   s0   |  d}d|_d| d|j|dksJ d S )N<tag></tag>Tz<?xml version="1.0" encoding="z"?>
<tag></tag>eventual_encoding)r   r+   decode)r   r   actual_encodingr   r   r   r    test_decode_xml_declaration   s
   


z&TestOutput.test_decode_xml_declarationr   c                 C   s   g | ]}|qS r   r   rr   r   r   r    
<listcomp>  s    zTestOutput.<listcomp>Nc                 C   s(   t dd}d|_d|j|dksJ d S )Nr   r   Tz!<?xml version="1.0"?>
<tag></tag>r   )r   r+   r   )r   r   r   r   r   r    Mtest_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding
  s
   
	zXTestOutput.test_decode_xml_declaration_with_missing_or_python_internal_eventual_encodingc                 C   sV   |  d}d|jddksJ d|jddksJ d| ks!J d| ks)J d S )Nr   s   <tag></tag>r$   )encodingz<tag>
</tag>
)r   r&   encode_contentsdecode_contentsprettifyr6   r   r   r    test  s
   
zTestOutput.test)
rC   rD   rE   r_   r   r   r   r
   r   r   r   r   r   r    r      s    

r   c                   @   s   e Zd Zdeej dee dejfddZdeej ddfdd	Z	d
d Z
dd Zdd Zdd Zdd Zdd Zdd Zejdg ddd Zejdg ddd Zdd  Zd!d" Zd#d$ Zd%d& ZdS )'TestWarningsrS   clsreturnc                 C   s<   |D ]}t |j|r|jtksJ |  S qtd||f )Nz%s warning not found in %r)rU   rW   filename__file__	Exception)r   rS   r   rY   r   r   r    _assert_warning,  s   zTestWarnings._assert_warningrY   Nc                 C   s2   |  |t}t|j}|tjd d sJ d S )N<   )r   r   rV   rW   rX   MESSAGE)r   rY   warningrW   r   r   r    _assert_no_parser_specified5  s   
z(TestWarnings._assert_no_parser_specifiedc                 C   sB   t jdd}td W d    n1 sw   Y  | | d S NTrL   <a><b></b></a>rS   rT   r   r   r   rY   r   r   r    #test_warning_if_no_parser_specified:  s   
z0TestWarnings.test_warning_if_no_parser_specifiedc                 C   sD   t jdd}tdd W d    n1 sw   Y  | | d S )NTrL   r   htmlr   r   r   r   r    *test_warning_if_parser_specified_too_vague?  s   z7TestWarnings.test_warning_if_parser_specified_too_vaguec                 C   sF   t jdd}| d W d    n1 sw   Y  g |ks!J d S r   rS   rT   r   r   r   r   r    ,test_no_warning_if_explicit_parser_specifiedD  s   z9TestWarnings.test_no_warning_if_explicit_parser_specifiedc                 C   sn   t ddd}tjdd}| jd|d W d    n1 sw   Y  | |t}t|j}|ds5J d S )	Nrd   r|   )namer   TrL   r   
parse_onlyz7The given value for parse_only will exclude everything:)	r   rS   rT   r   r   UserWarningrV   rW   rX   )r   strainerrY   r   rZ   r   r   r    +test_warning_if_strainer_filters_everythingI  s   
z8TestWarnings.test_warning_if_strainer_filters_everythingc                 C   s   t jdd}tddtdd}W d    n1 sw   Y  | |t}t|j}d|v s0J d|v s6J d	| ks>J d S )
NTrL   r   r   r|   )parseOnlyTheser   r   s   <b></b>)	rS   rT   r   r   r   DeprecationWarningrV   rW   r&   )r   rY   r   r   rZ   r   r   r    )test_parseOnlyThese_renamed_to_parse_onlyQ  s   
z6TestWarnings.test_parseOnlyThese_renamed_to_parse_onlyc                 C   s~   t jdd}d}t|ddd}W d    n1 sw   Y  | |t}t|j}d|v s0J d|v s6J d|jks=J d S )	NTrL   s   ér   utf8)fromEncodingr   from_encoding)rS   rT   r   r   r   rV   rW   r'   )r   rY   r   r   r   rZ   r   r   r    *test_fromEncoding_renamed_to_from_encoding^  s   
z7TestWarnings.test_fromEncoding_renamed_to_from_encodingc                 C   s>   t t | jddd W d    d S 1 sw   Y  d S )Nz<a>T)no_such_argument)r_   r`   r   r   r<   r   r   r    "test_unrecognized_keyword_argumenth  s   "z/TestWarnings.test_unrecognized_keyword_argumentr:   )zmarkup.htmlz
markup.htmzmarkup.HTMLz
markup.txtzmarkup.xhtmlz
markup.xmlz/home/user/file.txtz/c:\user\file.html\\server\share\path\file.XhTmlc                 C   sZ   t jdd}t|d | |t}dt|jv sJ W d    d S 1 s&w   Y  d S )NTrL   r   zlooks more like a filenamerS   rT   r   r   r   rV   rW   )r   r:   rY   r   r   r   r    test_resembles_filename_warningl  s
   
"z,TestWarnings.test_resembles_filename_warning)r   
markuphtmlz
markup.comrN   z	markup.jsz
markup.jpgzmarkup.markupz/home/user/filez)c:\user\file.html\\server\share\path\filezAlog message containing a url http://www.url.com/ right there.htmlztwo  consecutive  spaces.htmlztwo//consecutive//slashes.htmlz4looks/like/a/filename/but/oops/theres/a#comment.htmlztwo
lines.htmlzcontains?.htmlzcontains*.htmlzcontains#.htmlzcontains&.htmlzcontains;.htmlzcontains>.htmlzcontains<.htmlzcontains$.htmlzcontains|.htmlzcontains:.htmlz:-at-the-front.htmlc                 C   sF   t jdd}| | W d    n1 sw   Y  g |ks!J d S )NTrL   r   )r   r:   rY   r   r   r    "test_resembles_filename_no_warning  s   (z/TestWarnings.test_resembles_filename_no_warningc                 C   st   d}t jdd}t|d W d    n1 sw   Y  | |t}dt|jv s,J |t|jdvs8J d S )Ns   http://www.crummybytes.com/TrL   r   looks more like a URLr   )rS   rT   r   r   r   rV   rW   r&   r   urlwarning_listr   r   r   r    test_url_warning_with_bytes_url  s   z,TestWarnings.test_url_warning_with_bytes_urlc                 C   sn   d}t jdd}t|d W d    n1 sw   Y  | |t}dt|jv s,J |t|jvs5J d S )Nzhttp://www.crummyunicode.com/TrL   r   r   r   r   r   r   r    !test_url_warning_with_unicode_url  s   z.TestWarnings.test_url_warning_with_unicode_urlc                 C   P   t jdd}| d W d    n1 sw   Y  tdd |D r&J d S )NTrL   s$   http://www.crummybytes.com/ is greatc                 s       | ]
}d t |jv V  qdS r   NrV   rW   rs   rY   r   r   r    rv         zETestWarnings.test_url_warning_with_bytes_and_space.<locals>.<genexpr>rS   rT   r   anyr   r   r   r   r    %test_url_warning_with_bytes_and_space  s   z2TestWarnings.test_url_warning_with_bytes_and_spacec                 C   r   )NTrL   z&http://www.crummyunicode.com/ is greatc                 s   r   r   r   r   r   r   r    rv     r   zGTestWarnings.test_url_warning_with_unicode_and_space.<locals>.<genexpr>r   r   r   r   r    'test_url_warning_with_unicode_and_space  s   z4TestWarnings.test_url_warning_with_unicode_and_space)rC   rD   rE   r   rS   WarningMessager   Warningr   r   r   r   r   r   r   r   r   r_   r   r   r   r   r   r   r   r   r   r   r   r    r   $  s<    
	


$
r   c                   @   r\   )TestSelectiveParsingc                 C   s.   d}t d}| j||d}| dksJ d S )Nz&No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>r|   r   s   <b>Yes</b><b>Yes <c>Yes</c></b>)r   r   r&   )r   r:   r   r   r   r   r    test_parse_with_soupstrainer  s   z1TestSelectiveParsing.test_parse_with_soupstrainerN)rC   rD   rE   r   r   r   r   r    r     r]   r   c                   @   s:   e Zd ZdZdd Zejje dddd Z	dd	 Z
d
S )
TestNewTagz(Test the BeautifulSoup.new_tag() method.c                 C   s   |  d}|jdddddid}t|tsJ d|jksJ |jdks%J tddd|jks0J d |ju s7J |d}d |ju sCJ |jdd d	}d |ju sQJ |jddd	}d|jks_J d S )
NrN   footxtbazr   za name)r   barattrs)r   r   )r   )	r   new_tagrU   r   r   r   rR   r   parent)r   r   r   r   r   r    test_new_tag  s   

zTestNewTag.test_new_tagz-lxml not installed, cannot parse XML document)reasonc                 C   B   t dd}|d}|d}d| ksJ d| ksJ d S )NrN   xmlbrr}      <br/>s   <p/>r   r   r&   )r   xml_soupxml_brxml_pr   r   r    5test_xml_tag_inherits_self_closing_rules_from_builder  s
   


z@TestNewTag.test_xml_tag_inherits_self_closing_rules_from_builderc                 C   r   )NrN   r   r   r}   r   s   <p></p>r   )r   	html_souphtml_brhtml_pr   r   r    1test_tag_inherits_self_closing_rules_from_builder  s
   


z<TestNewTag.test_tag_inherits_self_closing_rules_from_builderN)rC   rD   rE   __doc__r   r_   r   skipifr   r   r   r   r   r   r    r     s    

r   c                   @   s    e Zd ZdZdd Zdd ZdS )TestNewStringz+Test the BeautifulSoup.new_string() method.c                 C   s2   |  d}|d}d|ksJ t|tsJ d S NrN   r   )r   
new_stringrU   r   r   r   r   r   r   r    'test_new_string_creates_navigablestring  s   

z5TestNewString.test_new_string_creates_navigablestringc                 C   s4   |  d}|dt}d|ksJ t|tsJ d S r   )r   r   r	   rU   r   r   r   r    3test_new_string_can_create_navigablestring_subclass  s   
zATestNewString.test_new_string_can_create_navigablestring_subclassN)rC   rD   rE   r   r   r   r   r   r   r    r     s    r   c                   @   s   e Zd Zdd Zdd ZdS )
TestPicklec                 C   s2   |  d}t|}t|}d|jjksJ d S )Nz<a>some markup</a>some markup)r   pickledumpsloadsrd   r   r   r   pickled	unpickledr   r   r    test_normal_pickle  s   


zTestPickle.test_normal_picklec                 C   s6   |  d}d |_t|}t|}d|jksJ d S )Nr   )r   rO   r   r   r   r   r   r   r   r    test_pickle_with_no_builder  s
   


z&TestPickle.test_pickle_with_no_builderN)rC   rD   rE   r   r   r   r   r   r    r     s    r   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )TestEncodingConversionc                 C   s&   d| _ | j d| _| jdksJ d S )NuU   <html><head><meta charset="utf-8"/></head><body><foo>Sacré bleu!</foo></body></html>r$   sU   <html><head><meta charset="utf-8"/></head><body><foo>Sacré bleu!</foo></body></html>)unicode_datar&   r(   r<   r   r   r    setup_method)  s
   z#TestEncodingConversion.setup_methodc              	   C   s   t j}ttj z9dd }|t _d}| |}| }t|ts#J || 	| ks.J |j
 dks7J W ttj |t _d S ttj |t _w )Nc                 S   r4   r5   r   )rV   r   r   r    noop9  r8   z>TestEncodingConversion.test_ascii_in_unicode_out.<locals>.noops   <foo>a</foo>r$   )r   _chardet_dammitloggingdisableWARNINGr   r   rU   rV   document_forr'   lowerNOTSET)r   chardetr  asciisoup_from_asciiunicode_outputr   r   r    test_ascii_in_unicode_out2  s   

z0TestEncodingConversion.test_ascii_in_unicode_outc                 C   s@   |  | j}| | jksJ |jjdksJ |jd u sJ d S Nu   Sacré bleu!)r   r   r   r   r   r'   r   soup_from_unicoder   r   r    test_unicode_in_unicode_outH  s   z2TestEncodingConversion.test_unicode_in_unicode_outc                 C   s2   |  | j}| | jksJ |jjdksJ d S r  )r   r(   r   r   r   r   )r   soup_from_utf8r   r   r    test_utf8_in_unicode_outP  s   z/TestEncodingConversion.test_utf8_in_unicode_outc                 C   s$   |  | j}|d| jksJ d S )Nr$   )r   r   r&   r(   r  r   r   r    test_utf8_outW  s   z$TestEncodingConversion.test_utf8_outN)rC   rD   rE   r  r  r  r  r  r   r   r   r    r   %  s    	r   )'r   r  r   r_   typingr   bs4r   r   r   bs4.builderr   bs4.elementr   r   r	   r
   r   r   
bs4.filterr   bs4.exceptionsr   bs4._warningsr   rN   r   r   r   rS   r   r   r   r   r   r   r   r   r   r   r   r   r    <module>   s0     L. ).