o
    MK&h                  	   @   s  U d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ er?dd	lmZ ddlZddlZddlZddlZddlZddlZd/ddZd0d
ddededdfddZG dd deZd
eddfddZ dZ!ee"d< dZ#ee"d< d1de$defddZ%d2de$defd!d"Z&d3d$e$defd%d&Z'd4d$e$ddfd(d)Z(d5d$e$d+eddfd,d-Z)e*d.kreej+,  dS dS )6z=Diagnostic functions, mainly for use when doing tech support.MIT    N)BytesIO)
HTMLParser)BeautifulSoup__version__)builder_registry)AnyIOListOptionalTupleTYPE_CHECKING)_IncomingMarkupdatar   returnc           	   	   C   s  t dt  t dtj  g d}|D ]}tjD ]	}||jv r! nq|| t d|  qd|v rZ|d zddl	m
} t d	d
tt|j  W n tyY   t d Y nw d|v ryzddl}t d|j  W n tyx   t d Y nw t| dr|  } |D ]:}t d|  d}z
t| |d}d}W n ty   t d|  t  Y nw |rt d|  t |  t d qdS )zDiagnostic suite for isolating common problems.

    :param data: Some markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.r   zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.r   NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   buildersr   removeappendr   r   joinmapstrLXML_VERSIONImportErrorr   hasattrr   r   	Exception	traceback	print_excprettify)	r   basic_parsersnamebuilderr   r   parsersuccesssoup r0   ?/var/www/html/venv/lib/python3.10/site-packages/bs4/diagnose.pydiagnose    sZ   





r2   Thtmlkwargsc                 K   sz   ddl m} |dd}t| tr| d} t| tst| }|j|f||d|D ]\}}t	d||j
|jf  q+dS )	a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   recoverTutf8)r3   r5   z%s, %4s, %sN)r   r   pop
isinstancer"   encoder	   r   	iterparser   tagtext)r   r3   r4   r   r5   readereventelementr0   r0   r1   
lxml_traceX   s   


 r@   c                	   @   s   e Zd ZdZdeddfddZ	dded	eeeee f  d
e	ddfddZ
ddede	ddfddZdeddfddZdeddfddZdeddfddZdeddfddZdeddfddZdeddfddZdeddfddZdS ) AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    sr   Nc                 C   s   t | d S )N)r   )selfrB   r0   r0   r1   _pw   s   zAnnouncingParser._pTr+   attrshandle_empty_elementc                 C   s   |  | d| d d S )N z STARTrD   )rC   r+   rE   rF   r0   r0   r1   handle_starttagz   s   z AnnouncingParser.handle_starttagcheck_already_closedc                 C      |  d|  d S )Nz%s ENDrH   )rC   r+   rJ   r0   r0   r1   handle_endtag      zAnnouncingParser.handle_endtagr   c                 C   rK   )Nz%s DATArH   rC   r   r0   r0   r1   handle_data   rM   zAnnouncingParser.handle_datac                 C   rK   )Nz
%s CHARREFrH   rC   r+   r0   r0   r1   handle_charref   rM   zAnnouncingParser.handle_charrefc                 C   rK   )Nz%s ENTITYREFrH   rP   r0   r0   r1   handle_entityref   rM   z!AnnouncingParser.handle_entityrefc                 C   rK   )Nz
%s COMMENTrH   rN   r0   r0   r1   handle_comment   rM   zAnnouncingParser.handle_commentc                 C   rK   )Nz%s DECLrH   rN   r0   r0   r1   handle_decl   rM   zAnnouncingParser.handle_declc                 C   rK   )Nz%s UNKNOWN-DECLrH   rN   r0   r0   r1   unknown_decl   rM   zAnnouncingParser.unknown_declc                 C   rK   )Nz%s PIrH   rN   r0   r0   r1   	handle_pi   rM   zAnnouncingParser.handle_piT)__name__
__module____qualname____doc__r"   rD   r
   r   r   boolrI   rL   rO   rQ   rR   rS   rT   rU   rV   r0   r0   r0   r1   rA   o   s*    
rA   c                 C   s   t  }||  dS )zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)rA   feed)r   r-   r0   r0   r1   htmlparser_trace   s   r^   aeiou_vowelsbcdfghjklmnpqrstvwxyz_consonants   lengthc                 C   s:   d}t | D ]}|d dkrt}nt}|t|7 }q|S )z<Generate a random word-like string.

    :meta private:
        r   )rangerb   r`   randomchoice)rd   rB   itr0   r0   r1   rword   s   rl      c                 C   s   d dd t| D S )z@Generate a random sentence-like string.

    :meta private:
    rG   c                 s   s     | ]}t td dV  qdS )rm   	   N)rl   rh   randint).0rj   r0   r0   r1   	<genexpr>   s    zrsentence.<locals>.<genexpr>)r    rg   )rd   r0   r0   r1   	rsentence   s   rr     num_elementsc                 C   s   g d}g }t | D ]9}tdd}|dkr#t|}|d|  q
|dkr3|ttdd q
|dkrCt|}|d|  q
d	d
| d S )zDRandomly generate an invalid HTML document.

    :meta private:
    )pdivspanrj   bscripttabler      z<%s>   rm   rf   z</%s>z<html>
z</html>)rg   rh   ro   ri   r   rr   r    )rt   	tag_nameselementsrj   ri   tag_namer0   r0   r1   rdoc   s   

r   順 c           	   	   C   s  t dt  t| }t dt|  dddgddfD ]5}d}zt }t|| t }d}W n tyB   t d	|  t  Y nw |rOt d
||| f  qddl	m
} t }|| t }t d||   ddl}| }t }|| t }t d||   dS )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r   r3   r   r   FTr   z"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.Nz(Raw html5lib parsed the markup in %.2fs.)r   r   r   lentimer   r&   r'   r(   r   r   HTMLr   r   parse)	rt   r   parser_namer.   arx   r   r   r-   r0   r0   r1   benchmark_parsers   s8   


r   r   r-   c                 C   sX   t  }|j}t| }tt||d}td||| t	|}|
d |dd dS )z7Use Python's profiler on a randomly generated document.)bs4r   r-   zbs4.BeautifulSoup(data, parser)
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler+   r   dictr   cProfilerunctxpstatsStats
sort_statsprint_stats)rt   r-   
filehandlefilenamer   varsstatsr0   r0   r1   profile   s   

r   __main__)r   r   r   NrW   )rc   )rm   )rs   )r   )r   r   )-r[   __license__r   ior   html.parserr   r   r   r   bs4.builderr   typingr   r	   r
   r   r   r   bs4._typingr   r   rh   r   r   r'   r   r2   r\   r@   rA   r"   r^   r`   __annotations__rb   intrl   rr   r   r   r   rX   stdinr   r0   r0   r0   r1   <module>   s>     	
8,#