o
    Jjg>                     @   s   U d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZmZmZ er1d dlmZmZmZmZ h dZe
e ed< G dd deZG d	d
 d
ZdS )    N)platform)
TYPE_CHECKINGAnyDictIterableListOptionalSetTuple	TypedDictUnion)Browser
CDPSessionPagesync_playwright>   ::markerbrsvgbodyheadhtmlmetapathstyletitleiframescriptblack_listed_elementsc                   @   sn   e Zd ZU dZeed< eed< ee ed< ee ed< ee ed< e	ed< eed< eed	< eed
< eed< dS )ElementInViewPortzIA typed dictionary containing information about elements in the viewport.
node_indexbackend_node_id	node_name
node_value	node_metais_clickableorigin_xorigin_ycenter_xcenter_yN)
__name__
__module____qualname____doc__str__annotations__intr   r   bool r1   r1   W/var/www/html/zoom/venv/lib/python3.10/site-packages/langchain/chains/natbot/crawler.pyr   $   s   
 r   c                   @   s   e Zd ZdZdddZdeddfddZd	eddfd
dZdeee	f ddfddZ
deee	f deddfddZdddZdee fddZdS )Crawlera   A crawler for web pages.

    **Security Note**: This is an implementation of a crawler that uses a browser via
        Playwright.

        This crawler can be used to load arbitrary webpages INCLUDING content
        from the local file system.

        Control access to who can submit crawling requests and what network access
        the crawler has.

        Make sure to scope permissions to the minimal permissions necessary for
        the application.

        See https://python.langchain.com/docs/security for more information.
    returnNc                 C   sh   zddl m} W n ty   tdw |  jjdd| _| j | _| j	ddd |  |  d S )	Nr   )r   z\Could not import playwright python package. Please install it with `pip install playwright`.F)headlessi   i8  )widthheight)
playwright.sync_apir   ImportErrorstartchromiumlaunchbrowsernew_pagepageset_viewport_size)selfr   r1   r1   r2   __init__E   s   zCrawler.__init__urlc                 C   s:   | j jd|v r	|nd| d | j j| j | _i | _d S )Nz://zhttp://)rC   )r?   gotocontextnew_cdp_sessionclientpage_element_buffer)rA   rC   r1   r1   r2   
go_to_pageU   s   
zCrawler.go_to_page	directionc                 C   s4   |dkr| j d d S |dkr| j d d S d S )Nupz(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;downz(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;)r?   evaluate)rA   rJ   r1   r1   r2   scrollZ   s   zCrawler.scrollidc                 C   sT   d}| j | | jt|}|r$|d }|d }| j j|| d S td d S )Nz
		links = document.getElementsByTagName("a");
		for (var i = 0; i < links.length; i++) {
			links[i].removeAttribute("target");
		}
		r'   r(   zCould not find element)r?   rM   rH   getr/   mouseclickprint)rA   rO   jselementxyr1   r1   r2   rR   d   s   zCrawler.clicktextc                 C   s   |  | | jj| d S )N)rR   r?   keyboardtype)rA   rO   rX   r1   r1   r2   rZ   w   s   
zCrawler.typec                 C   s   | j jd d S )NEnter)r?   rY   press)rA   r1   r1   r2   enter{   s   zCrawler.enterc           K         s  | j }| j}t }g }|d}tdkr|dkrd}|d}|d}|d}|d}	|| }
||	 }d}d}|d	d	d
t|dt|d | j	dg ddd}|d |d d	 }|d }|d }|d }|d }|d |d t
|d d }|d }|d }|d }|d }|d }|d }d	}i }g }ddi}ddi}d tt d!tt d"tfd#d$} dtttf d%tt d"tttf ffd&d'}!d(tttttt f f d)td*td tt d+td"tttt f f fd,d- tD ]l\}"}#|" }$|#  }% |d.|"|%|$\}&}' |d/|"|%|$\}(})z||"}W n   Y q|%tv r0q|| \}*}+},}-|*| }*|+| }+|,| },|-| }-|*}.|+}/|*|, }0|+|- }1|.|
k of|0|kof|/|k of|1|k}2|2skqg }3|!||" g d0}4|&pz|(}5|5sd n
|&rt|'nt|)}6|5sd n|t|6g }7|%d1kr|5r|7r||"  }8|8d2ks|8d3krq|7d4|8d5 n:|%d6kr|4d4d7ks|%d/krd/}%|4d4d  |4D ]}9|5r|7r|7d8|9|4|9 d9 q|3|4|9  qd }:||" d	kr||"  }:|:d2krqn&|%d6kr3|"|v r3|:d u r3||"};||; }<|;d	kr3|<d	kr3|< }:|5rA|%d.krA|%d/krAq|t|"||" |%|:|3|"|v t|*t|+t|*|,d  t|+|-d  d:
 qg }=d	}>|D ]}?|?d;}@|?d }%|?d<}:|?d=}A|?d>}B|:r|: d?nd@}Cd@}D|@|v r||@ D ].}E|Ed4}F|Ed}G|Fd8kr|Br|EdA}H|B|H dB|G dC q|C|G d?7 }Cq|Brd?|B}Id?|I }D|Cd@kr|C  }C| |%|A}J|Jd/ks|Dd@kr|JdDkr|Jd6kr|JdEkr|JdFkr|C d@krqn|?||>< |Cd@kr.|=dG|J dH|> |D dI|C dJ|J dI
 n|=dG|J dH|> |D dK |>d7 }>qntdLt |  |=S )MNzwindow.devicePixelRatiodarwin      zwindow.pageYOffsetzwindow.pageXOffsetzwindow.screen.widthzwindow.screen.heightr   z[scrollbar {:0.2f}-{:0.2f}%])rV   rW   rX   zDOMSnapshot.captureSnapshotT)computedStylesincludeDOMRectsincludePaintOrderstrings	documentsnodesbackendNodeId
attributes	nodeValueparentIndexnodeNameisClickableindex
inputValuevaluelayout	nodeIndexboundsz-1FNr!   has_click_handlerr4   c                 S   s8   | dkrdS | dkrdS | dkrdS | dks|rdS dS )NalinkinputimgbuttonrX   r1   )r!   rt   r1   r1   r2   convert_name   s   z#Crawler.crawl.<locals>.convert_namekeysc                    sd   i }t t| fd  D ]$\}}|dk rq | } | }||v r/|||< || |s/|  S q|S )Nr`   r   )zipiterremove)rh   r{   values	key_indexvalue_indexkeyro   )rd   r1   r2   find_attributes   s   
z&Crawler.crawl.<locals>.find_attributes	hash_treetagnode_id	parent_idc                    sx   t |}|| vr|   }| } | |||| | | \}}	||kr+d|f}
n	|r2d|	f}
nd}
|
| t |< |
S )NTrs   )r-   lower)r   r   r   r!   r   parent_id_strparent_namegrand_parent_idis_parent_desc_anchor	anchor_idro   add_to_hash_tree
node_namesparentrd   r1   r2   r      s    


z'Crawler.crawl.<locals>.add_to_hash_treeru   ry   )rZ   placeholderz
aria-labelr   altz#text|u   •rZ   )rZ   ro   rw   submit	attribute)rZ   r   ro   )
r   r    r!   r"   r#   r$   r%   r&   r'   r(   r   r"   r$   r#     r   z=""rv   rx   textarea<z id=>z</z/>zParsing time: {:0.2f} seconds)r?   rH   timerM   r   appendformatroundrG   sendsetr   r-   r0   r   r/   r   r   r
   	enumerater   rm   r   
setdefaultrP   popjoinstriprS   )KrA   r?   rH   r:   page_state_as_textdevice_pixel_ratiowin_upper_boundwin_left_bound	win_width
win_heightwin_right_boundwin_lower_boundpercentage_progress_startpercentage_progress_endtreedocumentrf   r    rh   r"   r$   input_valueinput_value_indexinput_value_valuesrp   layout_node_indexrr   cursorchild_nodeselements_in_view_portanchor_ancestrybutton_ancestryrz   r   rm   node_name_indexnode_parentr!   is_ancestor_of_anchorr   is_ancestor_of_button	button_idrV   rW   r6   r7   elem_left_boundelem_top_boundelem_right_boundelem_lower_boundpartially_is_in_viewport	meta_dataelement_attributesancestor_exceptionancestor_node_keyancestor_noderX   r   element_node_valuenode_input_text_index
text_indexelements_of_interest
id_counterrU   r   node_is_clickablenode_meta_data
inner_textr   child
entry_typeentry_value	entry_keymeta_stringconverted_node_namer1   r   r2   crawl~   s  









#




	
























 zCrawler.crawl)r4   N)r)   r*   r+   r,   rB   r-   rI   rN   r   r/   rR   rZ   r]   r   r   r1   r1   r1   r2   r3   3   s    


r3   )r   sysr   typingr   r   r   r   r   r   r	   r
   r   r   r8   r   r   r   r   r   r-   r.   r   r3   r1   r1   r1   r2   <module>   s   
0