o
    Í5÷da   ã                   @   s,   d dl mZ d dlmZ G dd„ deƒZdS )é   )ÚCharSetProber)ÚProbingStatec                       s¨   e Zd ZdZdZdZ‡ fdd„Z‡ fdd„Zedd	„ ƒZ	ed
d„ ƒZ
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zedd„ ƒZd d!„ Z‡  ZS )"ÚUTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
    é   g®Gázî?c                    sj   t ƒ  ¡  d| _dgd | _dgd | _tj| _g d¢| _d| _	d| _
d| _d| _d| _d| _|  ¡  d S )Né    é   ©r   r   r   r   F)ÚsuperÚ__init__ÚpositionÚzeros_at_modÚnonzeros_at_modr   Ú	DETECTINGÚ_stateÚquadÚinvalid_utf16beÚinvalid_utf16leÚinvalid_utf32beÚinvalid_utf32leÚ'first_half_surrogate_pair_detected_16beÚ'first_half_surrogate_pair_detected_16leÚreset©Úself©Ú	__class__© úxC:\Users\jesus\OneDrive\Desktop\erpjis_fastapi\backend\jisbackend\Lib\site-packages\pip/_vendor/chardet/utf1632prober.pyr
   '   s   

zUTF1632Prober.__init__c                    sb   t ƒ  ¡  d| _dgd | _dgd | _tj| _d| _d| _	d| _
d| _d| _d| _g d¢| _d S )Nr   r   Fr   )r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   6   s   
zUTF1632Prober.resetc                 C   s4   |   ¡ rdS |  ¡ rdS |  ¡ rdS |  ¡ rdS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)Úis_likely_utf32beÚis_likely_utf32leÚis_likely_utf16beÚis_likely_utf16ler   r   r   r   Úcharset_nameD   s   zUTF1632Prober.charset_namec                 C   s   dS )NÚ r   r   r   r   r   ÚlanguageQ   s   zUTF1632Prober.languagec                 C   ó   t d| jd ƒS )Nç      ð?g      @©Úmaxr   r   r   r   r   Úapprox_32bit_charsU   ó   z UTF1632Prober.approx_32bit_charsc                 C   r%   )Nr&   g       @r'   r   r   r   r   Úapprox_16bit_charsX   r*   z UTF1632Prober.approx_16bit_charsc                 C   sj   |   ¡ }|| jko4| jd | | jko4| jd | | jko4| jd | | jko4| jd | | jko4| j S ©Nr   r   é   é   )r)   ÚMIN_CHARS_FOR_DETECTIONr   ÚEXPECTED_RATIOr   r   ©r   Úapprox_charsr   r   r   r   [   ó   
ÿþýûzUTF1632Prober.is_likely_utf32bec                 C   sj   |   ¡ }|| jko4| jd | | jko4| jd | | jko4| jd | | jko4| jd | | jko4| j S r,   )r)   r/   r   r0   r   r   r1   r   r   r   r   e   r3   zUTF1632Prober.is_likely_utf32lec                 C   óV   |   ¡ }|| jko*| jd | jd  | | jko*| jd | jd  | | jko*| j S )Nr   r.   r   r-   )r+   r/   r   r0   r   r   r1   r   r   r   r    o   ó   
ÿÿþûzUTF1632Prober.is_likely_utf16bec                 C   r4   )Nr   r-   r   r.   )r+   r/   r   r0   r   r   r1   r   r   r   r!   y   r5   zUTF1632Prober.is_likely_utf16lec                 C   s¶   |d dks&|d dks&|d dkr)|d dkr)d|d   kr$dkr)n nd| _ |d dksP|d dksP|d dkrU|d dkrWd|d   krMdkrYn d	S d| _d	S d	S d	S d	S )
zÖ
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r   é   éØ   r-   éß   Tr.   N)r   r   )r   r   r   r   r   Úvalidate_utf32_charactersƒ   s   
46
þz'UTF1632Prober.validate_utf32_charactersc                 C   s   | j s'd|d   krdkrn nd| _ n'd|d   kr!dkr&n nd| _nd|d   kr3dkr9n nd| _ nd| _| jshd|d   krKdkrRn nd| _d	S d|d   kr^dkrfn d	S d| _d	S d	S d|d   krtdkr{n nd| _d	S d| _d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        r7   r   éÛ   TéÜ   r8   Fr   N)r   r   r   r   )r   Úpairr   r   r   Úvalidate_utf16_characters™   s$   
€

ÿ

z'UTF1632Prober.validate_utf16_charactersc                 C   sœ   |D ]H}| j d }|| j|< |dkr,|  | j¡ |  | jdd… ¡ |  | jdd… ¡ |dkr:| j|  d7  < n	| j|  d7  < |  j d7  _ q| jS )Nr   r.   r   r-   r   )r   r   r9   r=   r   r   Ústate)r   Úbyte_strÚcÚmod4r   r   r   Úfeed¹   s   

zUTF1632Prober.feedc                 C   sJ   | j tjtjhv r| j S |  ¡ dkrtj| _ | j S | jdkr"tj| _ | j S )Ngš™™™™™é?i   )r   r   ÚNOT_MEÚFOUND_ITÚget_confidencer   r   r   r   r   r>   È   s   
üzUTF1632Prober.statec                 C   s(   |   ¡ s|  ¡ s|  ¡ s|  ¡ rdS dS )Ng333333ë?g        )r!   r    r   r   r   r   r   r   rE   Õ   s   þýüûÿøzUTF1632Prober.get_confidence)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r/   r0   r
   r   Úpropertyr"   r$   r)   r+   r   r   r    r!   r9   r=   rB   r>   rE   Ú__classcell__r   r   r   r   r      s,    	





 
r   N)Úcharsetproberr   Úenumsr   r   r   r   r   r   Ú<module>   s   