B
    `,*                 @   sn   d dl Z d dlmZ G dd deZG dd deZdd Zd	d
 Zdd Z	edddgZ
G dd deZdS )    N)
namedtuplec               @   st   e Zd ZdZdddZedd Zedd Zd	d
 Zdd Z	eee	Z
dd Zdd Zdd Zdd Zdd ZdS )AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc             C   s:   || _ || _|d kr tg | _nt|tks0t|| _d S )N)_words_mots	Alignment	alignmenttypeAssertionError)selfwordsmotsr    r   A/home/dcms/DCMS/lib/python3.7/site-packages/nltk/translate/api.py__init__/   s    zAlignedSent.__init__c             C   s   | j S )N)r   )r
   r   r   r   r   8   s    zAlignedSent.wordsc             C   s   | j S )N)r   )r
   r   r   r   r   <   s    zAlignedSent.motsc             C   s   | j S )N)
_alignment)r
   r   r   r   _get_alignment@   s    zAlignedSent._get_alignmentc             C   s"   t t| jt| j| || _d S )N)_check_alignmentlenr   r   r   )r
   r   r   r   r   _set_alignmentC   s    zAlignedSent._set_alignmentc             C   sD   dd dd | jD  }dd dd | jD  }d||| jf S )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c             s   s   | ]}d | V  qdS )z'%s'Nr   ).0wr   r   r   	<genexpr>O   s    z'AlignedSent.__repr__.<locals>.<genexpr>c             s   s   | ]}d | V  qdS )z'%s'Nr   )r   r   r   r   r   r   P   s    zAlignedSent(%s, %s, %r))joinr   r   r   )r
   r   r   r   r   r   __repr__I   s    zAlignedSent.__repr__c             C   s<  d}|d7 }x| j D ]}|d||f 7 }qW x| jD ]}|d||f 7 }q4W x.| jD ]$\}}|d| j | | j| f 7 }qTW x:tt| j d D ]$}|d| j | | j |d  f 7 }qW x:tt| jd D ]$}|d| j| | j|d  f 7 }qW |d	d
dd | j D  7 }|d	d
dd | jD  7 }|d7 }|S )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
z"%s_source" [label="%s"] 
z"%s_target" [label="%s"] 
z"%s_source" -- "%s_target" 
   z)"%s_source" -- "%s_source" [style=invis]
z)"%s_target" -- "%s_target" [style=invis]
z{rank = same; %s}
 c             s   s   | ]}d | V  qdS )z"%s_source"Nr   )r   r   r   r   r   r   u   s    z&AlignedSent._to_dot.<locals>.<genexpr>c             s   s   | ]}d | V  qdS )z"%s_target"Nr   )r   r   r   r   r   r   v   s    })r   r   r   ranger   r   )r
   sr   uvir   r   r   _to_dotT   s(     zAlignedSent._to_dotc             C   sl   |   d}d}y$tjdd| gtjtjtjd}W n tk
rR   tdY nX ||\}}|dS )zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz package)	r"   encode
subprocessPopenPIPEOSError	Exceptioncommunicatedecode)r
   Z
dot_stringZoutput_formatprocessouterrr   r   r   
_repr_svg_|   s    
zAlignedSent._repr_svg_c             C   s<   d | jdd d }d | jdd d }d||f S )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r   N   z...z<AlignedSent: '%s' -> '%s'>)r   r   r   )r
   sourcetargetr   r   r   __str__   s    zAlignedSent.__str__c             C   s   t | j| j| j S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r   r   r   invert)r
   r   r   r   r9      s    zAlignedSent.invert)N)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r"   r4   r8   r9   r   r   r   r   r      s   
	
(
r   c               @   sV   e Zd ZdZdd Zedd Zdd Zdd	 ZdddZ	dd Z
dd Zdd Zd
S )r   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c             C   s:   t | |}|t g kr*tdd |D nd|_d |_|S )Nc             s   s   | ]}|d  V  qdS )r   Nr   )r   pr   r   r   r      s    z$Alignment.__new__.<locals>.<genexpr>r   )	frozenset__new__max_len_index)clspairsr
   r   r   r   rA      s    $zAlignment.__new__c             C   s   t dd | D S )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        c             S   s   g | ]}t |qS r   )
_giza2pair)r   ar   r   r   
<listcomp>   s    z(Alignment.fromstring.<locals>.<listcomp>)r   split)rE   r   r   r   r   
fromstring   s    zAlignment.fromstringc             C   s   | j s|   | j |S )zN
        Look up the alignments that map from a given index or slice.
        )rD   _build_index__getitem__)r
   keyr   r   r   rM      s    zAlignment.__getitem__c             C   s   t dd | D S )zI
        Return an Alignment object, being the inverted mapping.
        c             s   s*   | ]"}|d  |d f|dd  V  qdS )r   r      Nr   )r   r?   r   r   r   r      s    z#Alignment.invert.<locals>.<genexpr>)r   )r
   r   r   r   r9      s    zAlignment.invertNc             C   sZ   t  }| js|   |s*ttt| j}x&|D ]}|dd | j| D  q0W t|S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c             s   s   | ]\}}|V  qd S )Nr   )r   _fr   r   r   r      s    z"Alignment.range.<locals>.<genexpr>)setrD   rL   listr   r   updatesorted)r
   Z	positionsimager?   r   r   r   r      s    
zAlignment.rangec             C   s   dt |  S )zM
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))rU   )r
   r   r   r   r      s    zAlignment.__repr__c             C   s   d dd t| D S )zM
        Produce a Giza-formatted string representing the alignment.
        r   c             s   s   | ]}d |dd  V  qdS )z%d-%dNrO   r   )r   r?   r   r   r   r      s    z$Alignment.__str__.<locals>.<genexpr>)r   rU   )r
   r   r   r   r8      s    zAlignment.__str__c             C   s@   dd t | jd D | _x | D ]}| j|d  | q W dS )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        c             S   s   g | ]}g qS r   r   )r   rP   r   r   r   rI      s    z*Alignment._build_index.<locals>.<listcomp>r   r   N)r   rC   rD   append)r
   r?   r   r   r   rL      s    
zAlignment._build_index)N)r:   r;   r<   r=   rA   classmethodrK   rM   r9   r   r   r8   rL   r   r   r   r   r      s   
r   c             C   s   |  d\}}t|t|fS )N-)rJ   int)pair_stringr!   jr   r   r   rG     s    rG   c             C   s    |  d\}}}t|t|fS )NrY   )rJ   rZ   )r[   r!   r\   r?   r   r   r   _naacl2pair	  s    r]   c                sP   t |tksttfdd|D s.tdt fdd|D sLtddS )ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c             3   s*   | ]"}d |d    ko k n  V  qdS )r   Nr   )r   pair)	num_wordsr   r   r     s    z#_check_alignment.<locals>.<genexpr>z&Alignment is outside boundary of wordsc             3   s6   | ].}|d  dkp,d|d    ko( k n  V  qdS )r   Nr   r   )r   r^   )num_motsr   r   r     s    z%Alignment is outside boundary of motsN)r   r   r	   all
IndexError)r_   r`   r   r   )r`   r_   r   r     s
    r   PhraseTableEntry
trg_phraselog_probc               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c             C   s   t  | _d S )N)dictsrc_phrases)r
   r   r   r   r   ,  s    zPhraseTable.__init__c             C   s
   | j | S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        )rh   )r
   
src_phraser   r   r   translations_for/  s    zPhraseTable.translations_forc             C   sL   t ||d}|| jkr g | j|< | j| | | j| jdd dd dS )z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )rd   re   c             S   s   | j S )N)re   )er   r   r   <lambda>K      z!PhraseTable.add.<locals>.<lambda>T)rN   reverseN)rc   rh   rW   sort)r
   ri   rd   re   entryr   r   r   add>  s
    	

zPhraseTable.addc             C   s
   || j kS )N)rh   )r
   ri   r   r   r   __contains__M  s    zPhraseTable.__contains__N)r:   r;   r<   r=   r   rj   rq   rr   r   r   r   r   rf   &  s
   rf   )r*   collectionsr   objectr   r@   r   rG   r]   r   rc   rf   r   r   r   r   <module>   s    b