o
    Í5÷dÁ  ã                   @   s8   d dl Z d dlZddlmZ e d¡ZG dd„ dƒZdS )é    Né   )ÚProbingStates%   [a-zA-Z]*[€-ÿ]+[a-zA-Z]*[^a-zA-Z€-ÿ]?c                   @   sn   e Zd ZdZddd„Zdd„ Zedd„ ƒZd	d
„ Zedd„ ƒZ	dd„ Z
edd„ ƒZedd„ ƒZedd„ ƒZdS )ÚCharSetProbergffffffî?Nc                 C   s   d | _ || _t t¡| _d S ©N)Ú_stateÚlang_filterÚloggingÚ	getLoggerÚ__name__Úlogger)Úselfr   © r   úxC:\Users\jesus\OneDrive\Desktop\erpjis_fastapi\backend\jisbackend\Lib\site-packages\pip/_vendor/chardet/charsetprober.pyÚ__init__+   s   zCharSetProber.__init__c                 C   s   t j| _d S r   )r   Ú	DETECTINGr   ©r   r   r   r   Úreset0   s   zCharSetProber.resetc                 C   s   d S r   r   r   r   r   r   Úcharset_name3   s   zCharSetProber.charset_namec                 C   s   t ‚r   )ÚNotImplementedError)r   Úbyte_strr   r   r   Úfeed7   ó   zCharSetProber.feedc                 C   s   | j S r   )r   r   r   r   r   Ústate:   s   zCharSetProber.statec                 C   s   dS )Ng        r   r   r   r   r   Úget_confidence>   r   zCharSetProber.get_confidencec                 C   s   t  dd| ¡} | S )Ns   ([ -])+ó    )ÚreÚsub)Úbufr   r   r   Úfilter_high_byte_onlyA   s   z#CharSetProber.filter_high_byte_onlyc                 C   sZ   t ƒ }t | ¡}|D ] }| |dd… ¡ |dd… }| ¡ s%|dk r%d}| |¡ q
|S )u7  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [Â€-Ã¿]
        marker: everything else [^a-zA-ZÂ€-Ã¿]
        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.
        This filter applies to all scripts which do not use English characters.
        Néÿÿÿÿó   €r   )Ú	bytearrayÚINTERNATIONAL_WORDS_PATTERNÚfindallÚextendÚisalpha)r   ÚfilteredÚwordsÚwordÚ	last_charr   r   r   Úfilter_international_wordsF   s   
z(CharSetProber.filter_international_wordsc                 C   s’   t ƒ }d}d}t| ƒ d¡} t| ƒD ])\}}|dkr!|d }d}q|dkr;||kr9|s9| | ||… ¡ | d¡ d}q|sG| | |d	… ¡ |S )
a[  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        Fr   Úcó   >r   ó   <r   TN)r!   Ú
memoryviewÚcastÚ	enumerater$   )r   r&   Úin_tagÚprevÚcurrÚbuf_charr   r   r   Úremove_xml_tagsh   s"   	
€zCharSetProber.remove_xml_tagsr   )r
   Ú
__module__Ú__qualname__ÚSHORTCUT_THRESHOLDr   r   Úpropertyr   r   r   r   Ústaticmethodr   r*   r5   r   r   r   r   r   '   s     




!r   )r   r   Úenumsr   Úcompiler"   r   r   r   r   r   Ú<module>   s   ÿ