How to test string contains one of the substrings in a list, in pandas?

vesszabo :

This question comes from this stackoverflow question.

s = pd.Series(['cat)','hat)','dog)','fog)','pet)'])
searchfor = ['og)', 'at)']
s[s.str.contains('|'.join(searchfor))]

The error msg is

error                                     
Traceback (most recent call last)
<ipython-input-208-b103d01401d9> in <module>
      1 searchfor = ['og)', 'at)']
----> 2 s[s.str.contains('|'.join(searchfor))]

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\strings.py in contains(self, pat, case, flags, na, regex)
   2521     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
   2522         result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
-> 2523                               regex=regex)
   2524         return self._wrap_result(result, fill_value=na)
   2525 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\strings.py in str_contains(arr, pat, case, flags, na, regex)
    297             flags |= re.IGNORECASE
    298 
--> 299         regex = re.compile(pat, flags=flags)
    300 
    301         if regex.groups > 0:

C:\ProgramData\Anaconda3\lib\re.py in compile(pattern, flags)
    232 def compile(pattern, flags=0):
    233     "Compile a regular expression pattern, returning a Pattern object."
--> 234     return _compile(pattern, flags)
    235 
    236 def purge():

C:\ProgramData\Anaconda3\lib\re.py in _compile(pattern, flags)
    284     if not sre_compile.isstring(pattern):
    285         raise TypeError("first argument must be string or compiled pattern")
--> 286     p = sre_compile.compile(pattern, flags)
    287     if not (flags & DEBUG):
    288         if len(_cache) >= _MAXCACHE:

C:\ProgramData\Anaconda3\lib\sre_compile.py in compile(p, flags)
    762     if isstring(p):
    763         pattern = p
--> 764         p = sre_parse.parse(p, flags)
    765     else:
    766         pattern = None

C:\ProgramData\Anaconda3\lib\sre_parse.py in parse(str, flags, pattern)
    942     if source.next is not None:
    943         assert source.next == ")"
--> 944         raise source.error("unbalanced parenthesis")
    945 
    946     if flags & SRE_FLAG_DEBUG:

error: unbalanced parenthesis at position 2

The expected output would be

0    cat)
1    hat)
2    dog)
3    fog)

Where is the unbalanced parenthesis? I couldn't guess the answer :-( (It looks like your post is mostly code; please add some more details.)

Quang Hoang :

) is a special regex character. You need to escape:

searchfor = ['og\)', 'at\)']
s[s.str.contains('|'.join(searchfor))]

Output:

0    cat)
1    hat)
2    dog)
3    fog)
dtype: object

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=174010&siteId=1