o
    6d5.                     @   s   d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZmZ G dd	 d	ZG d
d dZeeef Ze
e ZG dd dZdS )    )aliases)sha256)dumps)AnyDictIteratorListOptionalTupleUnion   )TOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   @   s  e Zd Z	d=dededededddee fd	d
Zde	defddZ
de	defddZedefddZdefddZdefddZd>ddZedefddZedee fddZedefddZedefdd Zedee fd!d"Zedefd#d$Zedefd%d&Zedefd'd(Zedefd)d*Zedefd+d,Zedefd-d.Zeded  fd/d0Zedefd1d2Zedee fd3d4Zedee fd5d6Z d?d8edefd9d:Z!edefd;d<Z"dS )@CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                 C   sF   || _ || _|| _|| _|| _d | _g | _d| _d | _d | _	|| _
d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leavesZ_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r    r%   pC:\Users\jesus\OneDrive\Desktop\erpjis_fastapi\backend\jisbackend\Lib\site-packages\charset_normalizer/models.py__init__   s   	
zCharsetMatch.__init__otherreturnc                 C   s>   t |tstdt|jt| j| j|jko| j|jkS )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr$   r(   r%   r%   r&   __eq__$   s   
zCharsetMatch.__eq__c                 C   sv   t |tstt| j|j }t| j|j }|dk r5|dkr5|dkr/| j|jkr/| j|jkS | j|jkS | j|jk S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?r   )r*   r   
ValueErrorabschaos	coherencemulti_byte_usage)r$   r(   Zchaos_differenceZcoherence_differencer%   r%   r&   __lt__-   s   
zCharsetMatch.__lt__c                 C   s   dt t| t | j  S )Ng      ?)lenr-   rawr$   r%   r%   r&   r7   @   s   zCharsetMatch.multi_byte_usagec                 C   s"   | j d u rt| j| jd| _ | j S )Nstrict)r#   r-   r   r   r;   r%   r%   r&   __str__D   s   
zCharsetMatch.__str__c                 C   s   d | j| jS )Nz<CharsetMatch '{}' bytes({})>)r,   r/   r0   r;   r%   r%   r&   __repr__J      zCharsetMatch.__repr__c                 C   s8   t |tr	|| krtd|jd |_| j| d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r*   r   r3   r,   r.   r#   r    appendr1   r%   r%   r&   add_submatchM   s   zCharsetMatch.add_submatchc                 C      | j S N)r   r;   r%   r%   r&   r/   X      zCharsetMatch.encodingc                 C   sD   g }t  D ]\}}| j|kr|| q| j|kr|| q|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr/   r@   )r$   Zalso_known_asupr%   r%   r&   encoding_aliases\   s   


zCharsetMatch.encoding_aliasesc                 C   rB   rC   r   r;   r%   r%   r&   bomi   rD   zCharsetMatch.bomc                 C   rB   rC   rI   r;   r%   r%   r&   byte_order_markm   rD   zCharsetMatch.byte_order_markc                 C   s   dd | j D S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                 S   s   g | ]}|d  qS )r   r%   ).0er%   r%   r&   
<listcomp>w       z*CharsetMatch.languages.<locals>.<listcomp>r   r;   r%   r%   r&   r   q   s   zCharsetMatch.languagesc                 C   sp   | j s1d| jv r
dS ddlm}m} t| jr|| jn|| j}t|dks+d|v r-dS |d S | j d d S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiZEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r   could_be_from_charsetZcharset_normalizer.cdrR   rS   r   r/   r9   )r$   rR   rS   r   r%   r%   r&   languagey   s   
zCharsetMatch.languagec                 C   rB   rC   )r   r;   r%   r%   r&   r5      rD   zCharsetMatch.chaosc                 C   s   | j sdS | j d d S )Nr   r   r   rP   r;   r%   r%   r&   r6      s   zCharsetMatch.coherencec                 C      t | jd ddS Nd      )ndigits)roundr5   r;   r%   r%   r&   percent_chaos      zCharsetMatch.percent_chaosc                 C   rW   rX   )r\   r6   r;   r%   r%   r&   percent_coherence   r^   zCharsetMatch.percent_coherencec                 C   rB   )z+
        Original untouched bytes.
        )r   r;   r%   r%   r&   r:      s   zCharsetMatch.rawc                 C   rB   rC   )r    r;   r%   r%   r&   submatch   rD   zCharsetMatch.submatchc                 C      t | jdkS Nr   )r9   r    r;   r%   r%   r&   has_submatch   s   zCharsetMatch.has_submatchc                 C   s@   | j d ur| j S dd t| D }ttdd |D | _ | j S )Nc                 S   s   g | ]}t |qS r%   )r   )rL   charr%   r%   r&   rN      s    z*CharsetMatch.alphabets.<locals>.<listcomp>c                 S   s   h | ]}|r|qS r%   r%   )rL   rr%   r%   r&   	<setcomp>   rO   z)CharsetMatch.alphabets.<locals>.<setcomp>)r   r-   sortedlist)r$   Zdetected_rangesr%   r%   r&   	alphabets   s   
zCharsetMatch.alphabetsc                 C   s   | j gdd | jD  S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                 S   s   g | ]}|j qS r%   )r/   )rL   mr%   r%   r&   rN      s    z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>)r   r    r;   r%   r%   r&   rU      s   z"CharsetMatch.could_be_from_charsetutf_8r/   c                 C   s2   | j du s
| j |kr|| _ t| |d| _| jS )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        Nreplace)r"   r-   encoder!   )r$   r/   r%   r%   r&   output   s   zCharsetMatch.outputc                 C   s   t |   S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   rn   	hexdigestr;   r%   r%   r&   r0      s   zCharsetMatch.fingerprintrC   )r(   r   r)   N)rk   )#__name__
__module____qualname__bytesr-   floatboolr	   r'   objectr2   r8   propertyr7   r=   r>   rA   r/   r   rH   rJ   rK   r   rV   r5   r6   r]   r_   r:   r`   rc   ri   rU   rn   r0   r%   r%   r%   r&   r   
   sn    
	
r   c                   @   s   e Zd ZdZddeee  fddZdee fddZ	d	e
eef defd
dZdefddZdefddZd	eddfddZded fddZded fddZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 C   s   |r	t || _d S g | _d S rC   )rg   _results)r$   ry   r%   r%   r&   r'      s   zCharsetMatches.__init__r)   c                 c   s    | j E d H  d S rC   rz   r;   r%   r%   r&   __iter__   s   zCharsetMatches.__iter__itemc                 C   sJ   t |tr
| j| S t |tr#t|d}| jD ]}||jv r"|  S qt)z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r*   intrz   r-   r   rU   KeyError)r$   r}   resultr%   r%   r&   __getitem__   s   





zCharsetMatches.__getitem__c                 C   s
   t | jS rC   r9   rz   r;   r%   r%   r&   __len__   s   
zCharsetMatches.__len__c                 C   ra   rb   r   r;   r%   r%   r&   __bool__   s   zCharsetMatches.__bool__c                 C   s|   t |tstdt|jt|jtkr0| j	D ]}|j
|j
kr/|j|jkr/||  dS q| j	| t| j	| _	dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r*   r   r3   r,   r-   r.   r9   r:   r   rz   r0   r5   rA   r@   rg   )r$   r}   matchr%   r%   r&   r@      s   


zCharsetMatches.appendr   c                 C   s   | j sdS | j d S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r{   r;   r%   r%   r&   best  s   
zCharsetMatches.bestc                 C   s   |   S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   r;   r%   r%   r&   first  s   zCharsetMatches.firstrC   )rp   rq   rr   __doc__r	   r   r   r'   r   r|   r   r~   r-   r   r   ru   r   r@   r   r   r%   r%   r%   r&   rx      s    rx   c                   @   s~   e Zd Zdedee dee dee dedee deded	ed
ee defddZe	de
eef fddZdefddZdS )CliDetectionResultpathr/   rH   alternative_encodingsrV   ri   r   r5   r6   unicode_pathis_preferredc                 C   sF   || _ |
| _|| _|| _|| _|| _|| _|| _|| _|	| _	|| _
d S rC   )r   r   r/   rH   r   rV   ri   r   r5   r6   r   )r$   r   r/   rH   r   rV   ri   r   r5   r6   r   r   r%   r%   r&   r'   &  s   
zCliDetectionResult.__init__r)   c                 C   s2   | j | j| j| j| j| j| j| j| j| j	| j
dS )Nr   r/   rH   r   rV   ri   r   r5   r6   r   r   r   r;   r%   r%   r&   __dict__@  s   zCliDetectionResult.__dict__c                 C   s   t | jdddS )NT   )ensure_asciiindent)r   r   r;   r%   r%   r&   to_jsonP  r?   zCliDetectionResult.to_jsonN)rp   rq   rr   r-   r	   r   ru   rt   r'   rw   r   r   r   r   r%   r%   r%   r&   r   %  s6    	

r   N)Zencodings.aliasesr   hashlibr   jsonr   typingr   r   r   r   r	   r
   r   Zconstantr   utilsr   r   r   r   rx   r-   rt   ZCoherenceMatchr   r   r%   r%   r%   r&   <module>   s    $ UC