관리-도구

편집 파일: defchararray.py

"""
This module contains a set of functions for vectorized string
operations and methods.

.. note::
   The `chararray` class exists for backwards compatibility with
   Numarray, it is not recommended for new development. Starting from numpy
   1.4, if one needs arrays of strings, it is recommended to use arrays of
   `dtype` `object_`, `string_` or `unicode_`, and use the free functions
   in the `numpy.char` module for fast vectorized string operations.

Some methods will only be available if the corresponding string method is
available in your version of Python.

The preferred alias for `defchararray` is `numpy.char`.

"""
from __future__ import division, absolute_import, print_function

import sys
from .numerictypes import string_, unicode_, integer, object_, bool_, character
from .numeric import ndarray, compare_chararrays
from .numeric import array as narray
from numpy.core.multiarray import _vec_string
from numpy.compat import asbytes, long
import numpy

__all__ = [
    'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal',
    'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
    'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
    'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
    'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
    'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
    'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
    'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
    'array', 'asarray'
    ]

_globalvar = 0
if sys.version_info[0] >= 3:
    _unicode = str
    _bytes = bytes
else:
    _unicode = unicode
    _bytes = str
_len = len

def _use_unicode(*args):
    """
    Helper function for determining the output type of some string
    operations.

For an operation on two ndarrays, if at least one is unicode, the
    result should be unicode.
    """
    for x in args:
        if (isinstance(x, _unicode) or
                issubclass(numpy.asarray(x).dtype.type, unicode_)):
            return unicode_
    return string_

def _to_string_or_unicode_array(result):
    """
    Helper function to cast a result back into a string or unicode array
    if an object array must be used as an intermediary.
    """
    return numpy.asarray(result.tolist())

def _clean_args(*args):
    """
    Helper function for delegating arguments to Python string
    functions.

Many of the Python string operations that have optional arguments
    do not use 'None' to indicate a default value.  In these cases,
    we need to remove all `None` arguments, and those following them.
    """
    newargs = []
    for chk in args:
        if chk is None:
            break
        newargs.append(chk)
    return newargs

def _get_num_chars(a):
    """
    Helper function that returns the number of characters per field in
    a string or unicode array.  This is to abstract out the fact that
    for a unicode array this is itemsize / 4.
    """
    if issubclass(a.dtype.type, unicode_):
        return a.itemsize // 4
    return a.itemsize

def equal(x1, x2):
    """
    Return (x1 == x2) element-wise.

Unlike `numpy.equal`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.