o
    hu                  
   @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZ zddlZW n eeefy>   dZY nw ddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZ edZedZ e!dZ"G dd dej#j$Z%G dd dej&Z'G dd dZ(G dd dZ)G dd deZ*G dd deZ+edZ,dd Z-G dd  d eZ.d!d" Z/d*d$d%Z0d&d' Z1e1  d(d) Z2dS )+z(This file is largely copied from http.py    N)copy)dumpsloads)urlparseF)_DEFAULT_CALLBACK)register_implementation)AbstractBufferedFileAbstractFileSystem)DEFAULT_BLOCK_SIZE
isfilelikenullcontexttokenize   )AllBytesz6<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)z-(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)zfsspec.httpc                   @      e Zd ZdS )JsHttpExceptionN__name__
__module____qualname__ r   r   t/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/fsspec/implementations/http_sync.pyr      s    r   c                   @   r   )StreamIONr   r   r   r   r   r   !   s    r   c                   @   s   e Zd ZdZdddZedd Zdd Zed	d
 Zedd Z	dd Z
dd Zedd Zedd Zedd Zedd Zedd Zdd ZdS )ResponseProxyzLooks like a requests responseFc                 C   s   || _ || _d | _d | _d S N)requeststream_data_headers)selfreqr   r   r   r   __init__*   s   
zResponseProxy.__init__c                 C   s8   | j d u r| jj }| jrt|| _ | j S || _ | j S r   )r   r   responseto_bytesr   r   )r   br   r   r   raw0   s   

zResponseProxy.rawc                 C   s   t | dr	| `d S d S )Nr   )hasattrr   r   r   r   r   close:   s   
zResponseProxy.closec                 C   s4   | j d u rtdd | j  dD | _ | j S )Nc                 S   s   g | ]}| d qS ): )split).0_r   r   r   
<listcomp>B   s    z)ResponseProxy.headers.<locals>.<listcomp>z
)r   dictr   getAllResponseHeadersstripr*   r'   r   r   r   headers>   s   
zResponseProxy.headersc                 C   s   t | jjS r   )intr   statusr'   r   r   r   status_codeI   s   zResponseProxy.status_codec                 C   s"   | j st| j| j| j| jd d S r   )okr   urlr4   reasonr1   r'   r   r   r   raise_for_statusM   s
   zResponseProxy.raise_for_statusc                 o   s"    	 | j |}|r|V  nd S qr   )r%   read)r   	chunksizer,   __outr   r   r   iter_contentS   s   zResponseProxy.iter_contentc                 C   s   | j jS r   )r   
statusTextr'   r   r   r   r7   [   s   zResponseProxy.reasonc                 C   s
   | j dk S )N  )r4   r'   r   r   r   r5   _      
zResponseProxy.okc                 C   s
   | j jjS r   )r   r"   responseURLr'   r   r   r   r6   c   r@   zResponseProxy.urlc                 C   s
   | j  S r   )contentdecoder'   r   r   r   textg   s   
zResponseProxy.textc                 C   s   d| _ | jS NF)r   r%   r'   r   r   r   rB   l   s   zResponseProxy.contentc                 C   s
   t | jS r   )r   rD   r'   r   r   r   jsonq      
zResponseProxy.jsonN)F)r   r   r   __doc__r!   propertyr%   r(   r1   r4   r8   r=   r7   r5   r6   rD   rB   rF   r   r   r   r   r   '   s.    

	







r   c                   @   sj   e Zd Zdd Z														dddZdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dS )RequestsSessionShimc                 C   s
   i | _ d S r   )r1   r'   r   r   r   r!   v   rG   zRequestsSessionShim.__init__Nc                 C   s>  ddl m}m} td|| |s|s|s|s|s|rt|r%|r%td| }|r-|nd}|r<| dtj	
| }|j||dg|R   |	rL|	|_|r]| D ]
\}}||| qR|dd	 d
|_|ry|t|gtdi}|| n |rt|tjr| }||gtd	i}|| n|d  t||dS )Nr   )BlobXMLHttpRequestzJS request: %s %szUse json= or data=, not bothr   ?FAcceptzapplication/octet-streamarraybufferzapplication/json)r   )jsrK   rL   loggerdebugNotImplementedError
ValueErrornewurllibparse	urlencodeopentimeoutitemssetRequestHeaderresponseTyper   typesend
isinstanceioIOBaser9   r   )r   methodr6   paramsdatar1   cookiesfilesauthrZ   allow_redirectsproxieshooksr   verifycertrF   rK   rL   r    extrakvblobr   r   r   r   y   s8   
zRequestsSessionShim.requestc                 K      | j d|fi |S )NGETr   r   r6   kwargsr   r   r   get      zRequestsSessionShim.getc                 K   rr   )NHEADrt   ru   r   r   r   head   rx   zRequestsSessionShim.headc                 K   rr   )NzPOST}rt   ru   r   r   r   post   rx   zRequestsSessionShim.postc                 K   rr   )NPUTrt   ru   r   r   r   put   rx   zRequestsSessionShim.putc                 K   rr   )NPATCHrt   ru   r   r   r   patch   rx   zRequestsSessionShim.patchc                 K   rr   )NDELETErt   ru   r   r   r   delete   rx   zRequestsSessionShim.delete)NNNNNNNNNNNNNN)r   r   r   r!   r   rw   rz   r{   r}   r   r   r   r   r   r   rJ   u   s.    
4rJ   c                       s  e Zd ZdZdZdZ							d3 fdd		Zed
d Zdd Z	e
dedefddZe
 fddZd4ddZd4ddZdd Zd5ddZdefddZdedfd d!Zd"d# Zd$d% Zd&d' Z	(					d6d)d*Zd+d, Zd-d. Zd7d/d0Zd1d2 Z  ZS )8HTTPFileSystema<  
    Simple File-System for fetching data via HTTP(S)

    This is the BLOCKING version of the normal HTTPFileSystem. It uses
    requests in normal python and the JS runtime in pyodide.

    ***This implementation is extremely experimental, do not use unless
    you are testing pyodide/pyscript integration***
    )httphttps	sync-http
sync-https/TN	readaheadFc              
      s  t  j| fi | |dur|nt| _|| _|| _|| _|| _|p"i | _|| _	|| _
zddl}	td t | _d| _W n* tyf }
 zddl}td|
 |jdi |pUi | _d| _W Y d}
~
nd}
~
ww t|}|dd| _|dd |d	d |d
d || _
dS )a  

        Parameters
        ----------
        block_size: int
            Blocks to read bytes; if 0, will default to raw requests file-like
            objects instead of HTTPFile instances
        simple_links: bool
            If True, will consider both HTML <a> tags and anything that looks
            like a URL; if False, will consider only the former.
        same_scheme: True
            When doing ls/glob, if this is True, only consider paths that have
            http/https matching the input URLs.
        size_policy: this argument is deprecated
        client_kwargs: dict
            Passed to aiohttp.ClientSession, see
            https://docs.aiohttp.org/en/stable/client_reference.html
            For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
        storage_options: key-value
            Any other parameters passed on to requests
        cache_type, cache_options: defaults used in open
        Nr   zStarting JS sessionTz'Starting cpython session because of: %sFuse_listings_cachelistings_expiry_time	max_pathsskip_instance_cacher   )superr!   r
   
block_sizesimple_linkssame_schema
cache_typecache_optionsclient_kwargsencodedrv   rP   rQ   rR   rJ   session	ExceptionrequestsSessionr   popr   )r   r   r   same_schemer   r   r   r   storage_optionsrP   er   request_options	__class__r   r   r!      s6   !



zHTTPFileSystem.__init__c                 C   s   dS )Nr   r   r'   r   r   r   fsid  s   zHTTPFileSystem.fsidc                 C   s   t r
t j|| jdS |S )N)r   )yarlURLr   r   r6   r   r   r   
encode_url  s   zHTTPFileSystem.encode_urlpathreturnc                 C   s   | dd dd}|S )z-For HTTP, we always want to keep the full URLzsync-http://zhttp://zsync-https://zhttps://)replace)clsr   r   r   r   _strip_protocol  s   zHTTPFileSystem._strip_protocolc                    s    t  |}t|dkr|S dS )N    )r   _parentlen)r   r   parr   r   r   r     s   zHTTPFileSystem._parentc              	   K   s  | j  }|| t| | jj| |fi | j }| || |j	}| j
r9t|dd t|D  }n
dd t|D }t }t|}	|D ]d}
t|
trW|
d }
|
drlt|
dkrl|	jd |	j |
 }
|
dr| jr|
|dd r||
 qL|
dd|dddd r||
 qL|
d	vr|d|d|
dg qL|s|dr| j|dd
d}|rdd |D S t|S )Nc                 S      g | ]}|d  qS r   r   r+   ur   r   r   r-   /      z+HTTPFileSystem._ls_real.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r-   1  r      r   z://r   r   )z..z../F)detailc                 S   s&   g | ]}|d | drdnddqS )Nr   	directoryfilenamesizer^   )endswithr   r   r   r   r-   I  s    )rv   r   updaterQ   rR   r   rw   r   _raise_not_found_for_statusrD   r   ex2findallexsetr   r`   tuple
startswithr   schemenetlocr   rstripaddr   joinlstripr   _ls_realsorted)r   r6   r   rv   kwrrD   linksr<   partslr   r   r   r   &  sD   


 


 	zHTTPFileSystem._ls_realc                 K   sB   | j r|| jv r| j| }|S | j|fd|i|}|| j|< |S )Nr   )r   dircacher   )r   r6   r   rv   r<   r   r   r   lsT  s   

zHTTPFileSystem.lsc                 C   s   |j dkr	t||  dS )zU
        Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
        i  N)r4   FileNotFoundErrorr8   )r   r"   r6   r   r   r   r   \  s   
z*HTTPFileSystem._raise_not_found_for_statusc                 K   s   | j  }|| t| |d us|d ur2||krdS |di  }| ||||d< ||d< | jj| 	|fi |}| 
|| |jS )N    r1   Range)rv   r   r   rQ   rR   r   _process_limitsr   rw   r   r   rB   )r   r6   startendrv   r   r1   r   r   r   r   cat_filed  s   


zHTTPFileSystem.cat_filei  P c           
   
   K   s   | j  }|| t| | jj| |fi |}zt|j	dd p+|j	dd }W n t
ttfy<   d }Y nw || | || t|sQt|d}|j|ddD ]}	||	 |t|	 qXd S )Ncontent-lengthContent-LengthwbF)decode_unicode)rv   r   r   rQ   rR   r   rw   r   r2   r1   rT   KeyError	TypeErrorset_sizer   r   rY   r=   writerelative_updater   )
r   rpathlpath
chunk_sizecallbackrv   r   r   r   chunkr   r   r   get_filet  s(   





zHTTPFileSystem.get_filer{   c                    st    fdd}| j  }|| | }|dvr!td|t| j|}	|	|fd| i|}
| |
| d S )Nc                  3   s    t tjrt} d}ntd} d}| C}|r) |dd |d n	 t|dd  |}|rR|V   	t
| |}|s9W d    d S W d    d S 1 s]w   Y  d S )NFrbTr   r   r   )r`   ra   rb   r   rY   r   seekgetattrr9   r   r   )contextuse_seekfr   r   r   r   r   r   
gen_chunks  s(   


"z+HTTPFileSystem.put_file.<locals>.gen_chunks)r{   r}   z.method has to be either 'post' or 'put', not: re   )rv   r   r   lowerrT   r   r   r   )r   r   r   r   r   rc   rv   r   r   methrespr   r   r   put_file  s   	

zHTTPFileSystem.put_filec                 C   s   d}d}|dur&|dk r&|du r| }d}d}n|p |  |d }|| }n|du r,d}|sW|durH|dk rH|durG|pB|  |d }|| }n|du rNd}t|trW|d8 }d| d	| S )
z"Helper for "Range"-based _cat_fileNFr   r   Tr   r   bytes=-)infor`   r2   )r   r6   r   r   r   suffr   r   r   r     s,   

zHTTPFileSystem._process_limitsc                 K   sZ   | j  }|| zt| | jj| |fi |}|jdk W S  t	y,   Y dS w )Nr?   F)
rv   r   r   rQ   rR   r   rw   r   r4   r   )r   r   rv   r   r   r   r   r   exists  s   


zHTTPFileSystem.existsc                 K   s   | j |fi |S r   )r   )r   r   rv   r   r   r   isfile  s   zHTTPFileSystem.isfiler   c           
   	   K   s   |dkrt |dur|n| j}| j }	|	| |p%| j|fi |d }|rA|rAt| |f| j||||p7| j|p;| j	d|	S t
| |f|| jd|	S )a  Make a file-like object

        Parameters
        ----------
        path: str
            Full URL with protocol
        mode: string
            must be "rb"
        block_size: int or None
            Bytes to download in one request; use instance value if None. If
            zero, will return a streaming Requests file-like instance.
        kwargs: key-value
            Any other parameters, passed to requests calls
        r   Nr   )r   r   moder   r   r   )r   r   )rS   r   rv   r   r   r   HTTPFiler   r   r   HTTPStreamFile)
r   r   r   r   
autocommitr   r   r   rv   r   r   r   r   _open  s:   

	zHTTPFileSystem._openc                 C   s   t || j| jS )z;Unique identifier; assume HTTP files are static, unchanging)r   rv   protocolr   r   r   r   ukey  s   zHTTPFileSystem.ukeyc                 K   s   i }dD ]E}z!| t| |f|| jd| j| |ddur&W  n$W q tyI } z|dkr8t||t	t
| W Y d}~qd}~ww |dd|ddiS )	aH  Get info of URL

        Tries to access location via HEAD, and then GET methods, but does
        not fetch the data.

        It is possible that the server does not supply any size information, in
        which case size will be given as None (and certain operations on the
        corresponding file will not work).
        )rz   rw   )size_policyr   r   Nrw   r   r   r^   r   )r   
_file_infor   r   rv   rw   r   r   rQ   rR   str)r   r6   rv   r   policyexcr   r   r   r     s2   
	
zHTTPFileSystem.infoc                    s"  ddl }|d}| |}|ddkr|dnt|}|ddkr*|dnt|}t||}|dd}	t|s_|}
d}|rH|d	7 }nY| |rY|	sR|gS || 	|iS |	s]g S i S d|d| v r|d| 
d}|d|d  }
d
|v r~dn||d d dd }nd}
d
|v rdn||d d dd }| j|
f|p|ddd| d|dddddddddddddddddddd d!d"d d |d#d$|d%d&|d$d' fd(d)t D }|	r|S t|S )*a  
        Find files by glob-matching.

        This implementation is idntical to the one in AbstractFileSystem,
        but "?" is not considered as a character for globbing, because it is
        so common in URLs, often identifying the "query" part.
        r   Nr   *[r   Fr   z/*z**r   T)maxdepthwithdirsr   ^\z\\.z\.+z\+//(z\()z\)|z\|z\^$z\${z\{}z\}z[*]{2}z=PLACEHOLDER=z[*]z[^/]*z.*c                    s.   i | ]} |d ddr| | qS )r  r   )matchr   r   )r+   pallpathspatternr   r   
<dictcomp>}  s    z'HTTPFileSystem.glob.<locals>.<dictcomp>)rer   r   findr   minr   	has_magicr   r   rindexcountr   r   subcompiler   list)r   r   r
  rv   r  endsindstarindbraceindr   rootdepthind2r<   r   r  r   glob7  st   

  


(&

zHTTPFileSystem.globc              	   C   s*   zt | |W S  ttfy   Y dS w rE   )boolr   r   rT   )r   r   r   r   r   isdir  s
   zHTTPFileSystem.isdir)TNTr   NNF)T)NN)r   NNNNNr   )r   r   r   rH   r   sepr!   rI   r   r   classmethodr  r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r-  r/  __classcell__r   r   r   r   r      sV    
?


.


-

4
 Pr   c                       sT   e Zd ZdZ						d fdd	Zd fdd		Zd
d Zdd Zdd Z  Z	S )r   a  
    A file-like object pointing to a remove HTTP(S) resource

    Supports only reading, with read-ahead of a predermined block-size.

    In the case that the server does not supply the filesize, only reading of
    the complete file in one go is supported.

    Parameters
    ----------
    url: str
        Full URL of the remote resource, including the protocol
    session: requests.Session or None
        All calls will be made within this session, to avoid restarting
        connections where the server allows this
    block_size: int or None
        The amount of read-ahead to do, in bytes. Default is 5MB, or the value
        configured for the FileSystem creating this file
    size: None or int
        If given, this is the size of the file in bytes, and we don't attempt
        to call the server to find the value.
    kwargs: all other key-values are passed to requests calls.
    Nr   bytesc	           
   	      sN   |dkrt d|| _|| _||dd| _t jd||||||d|	 d S )Nr   zFile mode not supportedr   r   )fsr   r   r   r   r   r   )rS   r6   r   detailsr   r!   )
r   r4  r6   r   r   r   r   r   r   rv   r   r   r   r!     s   
zHTTPFile.__init__c                    sj   |dk r| j dkr| jdur| j| jks|   | jdu r&|dk r%|   n	t| j| j  |}t |S )a5  Read bytes from file

        Parameters
        ----------
        length: int
            Read up to this many bytes. If negative, read all content to end of
            file. If the server has not supplied the filesize, attempting to
            read only part of the data will raise a ValueError.
        r   N)locr   	blocksize
_fetch_allr  r   r9   )r   lengthr   r   r   r9     s   
zHTTPFile.readc                 C   sr   t d|   t| jts7| jj| j| j	fi | j
}|  |j}tt|dd|d| _t|| _dS dS )zRead whole file in one shot, without caching

        This is only called when position is still at zero,
        and read() is called without a byte-count.
        zFetch all for N)r   fetcherr8  re   )rQ   rR   r`   cacher   r   rw   r4  r   r6   rv   r8   rB   r   r   )r   r   r<   r   r   r   r9    s    zHTTPFile._fetch_allc                 C   sx   | dd}td|}|sdS |d dkrd }}ndd	 |d d
D \}}|d dkr1dnt|d }|||fS )zParse the Content-Range headerContent-Ranger   zbytes (\d+-\d+|\*)/(\d+|\*))NNNr   r  Nc                 S   s   g | ]}t |qS r   )r2   )r+   xr   r   r   r-     r   z1HTTPFile._parse_content_range.<locals>.<listcomp>r   r   )rw   r  r  r*   r2   )r   r1   smr   r   totalr   r   r   _parse_content_range  s   

zHTTPFile._parse_content_rangec           
      C   sL  t d|  d| d|  | j }|di  }d| d|d  |d< t d| j|d  | jj| j	| jfd|i|}|j
d	krId
S |  |jd|jd|d }|j
dkpq| |jd |kpqt||| k}|ry|j}|S |dkrtdd}g }|ddD ]}	||	 |t|	7 }qd
|d||  }|S )a3  Download a block of data

        The expectation is that the server returns only the requested bytes,
        with HTTP code 206. If this is not the case, we first check the headers,
        and then stream the output - if the data size is bigger than we
        requested, an exception is raised.
        zFetch range for r)   r   r1   r   r   r   z%s : %si  r   r   r      r   zThe HTTP server doesn't appear to support range requests. Only reading this file from the beginning is supported. Open with block_size=0 for a streaming file interface.i   FN)rQ   rR   rv   r   r   r6   r   rw   r4  r   r4   r8   r1   rB  r2   rB   rT   r=   appendr   r   )
r   r   r   rv   r1   r   clresponse_is_ranger<   r   r   r   r   _fetch_range  s:   
"


zHTTPFile._fetch_range)NNr   r3  NNr6  )
r   r   r   rH   r!   r9   r9  rB  rG  r2  r   r   r   r   r     s    r   z([*[])c                 C   s   t | }|d uS r   )magic_checksearch)r?  r  r   r   r   r   ,  s   
r   c                       s8   e Zd Zd fdd	Zdd Zddd	Zd
d Z  ZS )r   r   Nc                    s   || _ || _|dkrt|d d| _t jd
|||dd| | jj| j|fddi|}| j	|| |
dd| _d	| _|| _d S )Nr   r  r   )r4  r   r   r   r   Ti   Fr   r   )r6   r   rT   r5  r   r!   rw   r4  r   r   r=   itleftoverr   )r   r4  r6   r   r   rv   r   r   r   r   r!   2  s    
zHTTPStreamFile.__init__c                 O   s   t d)NzCannot seek streaming HTTP file)rT   )r   argsrv   r   r   r   r   A  s   zHTTPStreamFile.seekr6  c                 C   s   | j g}t| j }||k s|dk r9z| j }W n	 ty!   Y nw |r*|| nn|t|7 }||k s|dk sd|}|dkrP||d  | _ |d | }nd| _ |  jt|7  _|S )Nr   r   )rL  r   rK  __next__StopIterationrD  r   r7  )r   numbufslengr<   r   r   r   r9   D  s(   


zHTTPStreamFile.readc                 C   s   | j   d| _d S )NT)r   r(   closedr'   r   r   r   r(   Z  s   

zHTTPStreamFile.close)r   NrH  )r   r   r   r!   r   r9   r(   r2  r   r   r   r   r   1  s
    
r   c                 K   sT   |  }|di   }d| d|d  |d< | j|fd|i|}|  |jS )Nr1   r   r   r   r   )r   r   rw   r8   rB   )r   r6   r   r   rv   r1   r   r   r   r   	get_range_  s   rT  rz   c           	      K   sD  t d|  | }|dd}|di  }||d< i }|dkr.|j| fd|i|}n|dkr>|j| fd|i|}ntd| |  d|jv rXt	|jd |d	< n6d
|jv rlt	|jd
 
dd |d	< n"d|jv r{t	|jd |d	< nd|jv rt	|jd 
dd |d	< dD ]}|j|r|j| ||< q|S )zCall HEAD on the server to get details about the file (size/checksum etc.)

    Default operation is to explicitly allow redirects and use encoding
    'identity' (no compression) to get the true size of the target.
    zRetrieve file size for %sri   Tr1   rz   rw   z)size_policy must be "head" or "get", got r   r   r=  r   r   r   zcontent-range)ETagzContent-MD5Digest)rQ   rR   r   r   rw   rz   r   r8   r1   r2   r*   )	r6   r   r  rv   arrz   r   r   checksum_fieldr   r   r   r  i  s2   



r  c                   C   s<   t dtdd t dtdd t dtdd t dtdd d S )Nr   Tclobberr   r   r   )r   r   r   r   r   r   register  s   r[  c                  C   s,   ddl m}  td| dd td| dd d S )Nr   r   r   TrY  r   )fsspec.implementations.httpr   r   r\  r   r   r   
unregister  s   r^  )rz   )3rH   ra   loggingr  urllib.errorrV   urllib.parser   rF   r   r   r   r   ImportErrorModuleNotFoundErrorOSErrorfsspec.callbacksr   fsspec.registryr   fsspec.specr   r	   fsspec.utilsr
   r   r   r   cachingr   r$  r   r   	getLoggerrQ   error	HTTPErrorr   BytesIOr   r   rJ   r   r   rI  r   r   rT  r  r[  r^  r   r   r   r   <module>   sP    


NK   R 
.

,