Source code for plane.plane

"""
Plane class, support chain function calls.
"""

from plane.func import PATTERNS, compile_regex
from plane.pattern import ASCII_WORD, Token
from plane.punctuation import punc


[docs]class Plane:
    """
    Init :class:`Plane.text` and :class:`Plane.values` when the instance is
    created.
    """

    def __init__(self):
        self._text = ""
        self._values = []

    @property
    def text(self):
        return self._text

    @property
    def values(self):
        return self._values

[docs]    def extract(self, regex, result=False):
        """
        :param Regex regex: :class:`Regex`
        :param bool result: if `True`, return result directly

        Extract tokens, results is saved in :class:`Plane.values`
        """
        regex = PATTERNS.get(regex.name, compile_regex(regex))
        values = []
        for mo in regex.finditer(self._text):
            name = mo.lastgroup
            value = mo.group(name)
            values.append(Token(name, value, mo.start(), mo.end()))

        if result:
            return values
        self._values.extend(values)
        return self

[docs]    def replace(self, regex, repl=None, result=False):
        """
        :param Regex regex: :class:`Regex`
        :param str repl: replacement for regex, if setted, default value will \
        be overwritten
        :param bool result: if `True`, return result directly

        Replace matched :class:`regex` patterns with :class:`repl`.
        """
        repl = repl if repl is not None else regex.repl
        text, start = "", 0

        for t in self.extract(regex, result=True):
            text += self._text[start : t.start] + repl
            start = t.end
        text += self._text[start:]

        if result:
            return text
        self._text = text
        return self

[docs]    def update(self, text):
        """
        :param str text: text string.

        Init `Plane.text` and `Plane.values`.
        """
        if not isinstance(text, str):
            raise TypeError("Only support string.")

        self._text = text
        self._values = []
        return self

[docs]    def segment(self, regex=ASCII_WORD):
        """
        :param Regex regex: default regex is `ASCII_WORD`, this will keep all \
        english words complete

        Segment sentence.
        Chinese words will be split into char and English words will be keeped.
        """
        regex = PATTERNS.get(regex.name, compile_regex(regex))
        result, start = [], 0
        for t in regex.finditer(self._text):
            result.extend(
                [char for char in list(self._text[start : t.start()]) if char != " "]
            )
            result.append(self._text[t.start() : t.end()])
            start = t.end()
        result.extend([char for char in list(self._text[start:]) if char != " "])
        return result

[docs]    def remove_punctuation(self, repl=" ", punc=punc):
        """
        :param str repl: replacement for regex, if setted, default value will \
        be overwritten

        remove all punctuations
        """
        self._text = punc.remove(self._text, repl)
        return self

[docs]    def normalize_punctuation(self, punc=punc):
        """
        normalize punctuations to English punctuations
        """
        self._text = punc.normalize(self.text)
        return self