B
    .(b|              -   @   s  d dl mZ d dlZdddgZedddd	g Zd
ddddddddddddddd	dddddddd d!d"d#d$d%d&ejd'ej d(ejd)ej d(d*d+d,d-d.d/d0d1d2d3d4d5d6g-Zd7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXejdYej d(dZd[g%Z	d\d]d^gZ
d]d_d^gZejd`edadb Zejd`e	dcdd Zejd`ededf Zejd`edgdh Zejjejdie
ejd`edjdk Zejjejdleejd`edmdn Zejjejdie
ejdleejd`edodp Zejjejdqe
ejdre
ejd`edsdt Zejjejdueejdveejd`edwdx ZdS )y    )unicode_literalsNzhttp://www.nytimes.com/2016/04/20/us/politics/new-york-primary-preview.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=a-lede-package-region&region=top-news&WT.nav=top-news&_r=0zwww.red-stars.comzmailto:foo.bar@baz.comzmailto:foo-bar@baz-co.comz$mailto:foo-bar@baz-co.com?subject=hizwww.google.com?q=googlez&http://foo.com/blah_(wikipedia)#cite-1zhttp://foo.com/blah_blahzhttp://BlahBlah.com/Blah_Blahzhttp://foo.com/blah_blah/z%http://www.example.com/wpstyle/?p=364z1https://www.example.com/foo/?bar=baz&inga=42&quuxz'http://userid:password@example.com:8080z(http://userid:password@example.com:8080/zhttp://userid@example.comzhttp://userid@example.com/zhttp://userid@example.com:8080zhttp://userid@example.com:8080/z"http://userid:password@example.comz#http://userid:password@example.com/zhttp://142.42.1.1/zhttp://142.42.1.1:8080/z+http://foo.com/blah_(wikipedia)_blah#cite-1u&   http://foo.com/unicode_(✪)_in_parensz'http://foo.com/(something)?after=parensz/http://code.google.com/events/#&product=browserzhttp://j.mpzftp://foo.bar/bazz,http://foo.bar/?q=Test%20URL-encoded%20stuffz2http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.comzhttp://1337.netzhttp://a.b-c.dezhttp://223.255.255.254zhttp://a.b--c.de/z+ssh://login@server.com:12345/repository.gitz&svn+ssh://user@ssh.yourdomain.com/pathz8chrome://extensions/?id=mhjfbmdgcfjbbpaeojofohoefgiehjai)Zmarksz3chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjaiz$http://foo.com/blah_blah_(wikipedia)z,http://foo.com/blah_blah_(wikipedia)_(again)zhttp://www.foo.co.ukzhttp://www.foo.co.uk/zhttp://www.foo.co.uk/blah/blahu   http://⌘.wsu   http://⌘.ws/u   http://☺.damowmow.com/u   http://✪df.ws/123u   http://➡.ws/䨹u   http://مثال.إختبارu   http://例子.测试u/   http://उदाहरण.परीक्षाzhttp://zhttp://.z	http://..z
http://../zhttp://?z	http://??z
http://??/zhttp://#z	http://##z
http://##/z)http://foo.bar?q=Spaces should be encodedz//z//az///az///z	http:///azrdar://1234zh://testzhttp:// shouldfail.comz:// should failzhttp://foo.bar/foo(bar)baz quuxzhttp://-error-.invalid/zhttp://a.b-.cozhttp://0.0.0.0zhttp://10.1.1.0zhttp://10.1.1.255zhttp://224.1.1.1zhttp://123.123.123zhttp://3628126748zhttp://.www.foo.bar/zhttp://.www.foo.bar./zhttp://10.1.1.1zNASDAQ:GOOGzhttp://-a.b.cozfoo.comzhttp://1.1.1.1.1zhttp://www.foo.bar./(">:urlc             C   s   |  |d k	std S )N)	url_matchAssertionError)en_tokenizerr    r   N/home/dcms/DCMS/lib/python3.7/site-packages/spacy/tests/tokenizer/test_urls.pytest_should_match{   s    r   c             C   s   |  |d kstd S )N)r   r	   )r
   r   r   r   r   test_should_not_match   s    r   c             C   s.   | |}t |dkst|d j|ks*td S )N   r   )lenr	   text)	tokenizerr   tokensr   r   r   !test_tokenizer_handles_simple_url   s    r   c             C   sZ   | d| d }t |dks t|d jdks2t|d j|ksDt|d jdksVtd S )Nr   )   r   r      )r   r	   r   )r   r   r   r   r   r   *test_tokenizer_handles_simple_surround_url   s
    r   prefixc             C   sD   | || }t |dkst|d j|ks.t|d j|ks@td S )Nr   r   r   )r   r	   r   )r   r   r   r   r   r   r   #test_tokenizer_handles_prefixed_url   s    r   suffixc             C   sD   | || }t |dkst|d j|ks.t|d j|ks@td S )Nr   r   r   )r   r	   r   )r   r   r   r   r   r   r   #test_tokenizer_handles_suffixed_url   s    r   c             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )Nr   r   r   r   )r   r	   r   )r   r   r   r   r   r   r   r   #test_tokenizer_handles_surround_url   s
    r   prefix1prefix2c             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )Nr   r   r   r   )r   r	   r   )r   r   r   r   r   r   r   r   %test_tokenizer_handles_two_prefix_url   s
    r    suffix1suffix2c             C   sZ   | || | }t |dks t|d j|ks2t|d j|ksDt|d j|ksVtd S )Nr   r   r   r   )r   r	   r   )r   r!   r"   r   r   r   r   r   %test_tokenizer_handles_two_suffix_url   s
    r#   )
__future__r   ZpytestZ
URLS_BASICZ	URLS_FULLparammarkZxfailZURLS_SHOULD_MATCHZURLS_SHOULD_NOT_MATCHPREFIXESZSUFFIXESZparametrizer   r   r   r   Zslowr   r   r   r    r#   r   r   r   r   <module>   s   

			