如何拆分熊猫字符串以提取中间名?

您可以尝试格式文件:如果您在第二列上应用了引号... [csvfile] 包含 AAAAA,"B,BB",CCC


Create Table csvfile

(

f1 VarChar(10),

f2 VarChar(10),

f3 VarChar(10)

)


BULK INSERT csvfile   

     FROM 'c:\downloads\sample.csv'   

     WITH (FORMATFILE = 'c:\downloads\sample.fmt'); 

样本文件


14.0

3

1       SQLCHAR             0       10      ",\""    1     f1       SQL_Latin1_General_CP1_CI_AS

2       SQLCHAR             0       10      "\","    2     f2       SQL_Latin1_General_CP1_CI_AS

3       SQLCHAR             0       10      "\r\n"   3     f3       SQL_Latin1_General_CP1_CI_AS

分享


翻翻过去那场雪
浏览 233回答 3
3回答

智慧大石

一个str.extract电话将在这里工作:p = r'^(?P<Last_Name>.*), (?P<First_Name>\S+)\b\s*(?P<Middle_Name>.*)'&nbsp;u = df.loc[df.Type == "I", 'Complete_Name'].str.extract(p)pd.concat([df, u], axis=1).fillna('')&nbsp; &nbsp;ID&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Complete_Name Type&nbsp; &nbsp; &nbsp;Last_Name First_Name&nbsp; &nbsp;Middle_Name0&nbsp; &nbsp;1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; JERRY, Ben&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;JERRY&nbsp; &nbsp; &nbsp; &nbsp; Ben&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;1&nbsp; &nbsp;2&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; VON HELSINKI, Olga&nbsp; &nbsp; I&nbsp; VON HELSINKI&nbsp; &nbsp; &nbsp; &nbsp;Olga&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;2&nbsp; &nbsp;3&nbsp; JENSEN, James Goodboy Dean&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp; JENSEN&nbsp; &nbsp; &nbsp; James&nbsp; Goodboy Dean3&nbsp; &nbsp;4&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;THE COMPANY&nbsp; &nbsp; C&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;4&nbsp; &nbsp;5&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;CRUZ, Juan S. de la&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; CRUZ&nbsp; &nbsp; &nbsp; &nbsp;Juan&nbsp; &nbsp; &nbsp; S. de la正则表达式分解^&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Start-of-line(?P<Last_Name>&nbsp; &nbsp;# First named capture group - Last Name&nbsp; &nbsp; .*&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Match anything until...),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # ...we see a comma\s&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# whitespace&nbsp;(?P<First_Name>&nbsp; # Second capture group - First Name&nbsp; &nbsp; \S+&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Match all non-whitespace characters)\b&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Word boundary&nbsp;\s*&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Optional whitespace chars (mostly housekeeping)&nbsp;(?P<Middle_Name> # Third capture group - Zero of more middle names&nbsp;&nbsp; &nbsp; .*&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Match everything till the end of string)

有只小跳蛙

我认为你可以这样做:# take the complete_name column and split it multiple timesdf2 = (df.loc[df['Type'].eq('I'),'Complete_Name'].str&nbsp; &nbsp; &nbsp; &nbsp;.split(',', expand=True)&nbsp; &nbsp; &nbsp; &nbsp;.fillna(''))# remove extra spaces&nbsp;for x in df2.columns:&nbsp; &nbsp; df2[x] = [x.strip() for x in df2[x]]# split the name on first space and join itdf2 = pd.concat([df2[0],df2[1].str.split(' ',1, expand=True)], axis=1)df2.columns = ['last','first','middle']# join the data framesdf = pd.concat([df[['ID','Complete_Name']], df2], axis=1)# rearrange columns - not necessary thoughdf = df[['ID','Complete_Name','first','middle','last']]# remove none valuesdf = df.replace([None], '')&nbsp; &nbsp;ID&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Complete_Name Type&nbsp; first&nbsp; &nbsp; &nbsp; &nbsp; middle&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; last0&nbsp; &nbsp;1&nbsp; &nbsp;JERRY, Ben&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; I&nbsp; &nbsp; Ben&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;JERRY1&nbsp; &nbsp;2&nbsp; &nbsp;VON HELSINKI, Olga&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; I&nbsp; &nbsp;Olga&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; VON HELSINKI2&nbsp; &nbsp;3&nbsp; &nbsp;JENSEN, James Goodboy Dean&nbsp; &nbsp; &nbsp; I&nbsp; James&nbsp; Goodboy Dean&nbsp; &nbsp; &nbsp; &nbsp; JENSEN3&nbsp; &nbsp;4&nbsp; &nbsp;THE COMPANY&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;C&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;4&nbsp; &nbsp;5&nbsp; &nbsp;CRUZ, Juan S. de la&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;I&nbsp; &nbsp;Juan&nbsp; &nbsp; &nbsp; S. de la&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; CRUZ

MM们

这是使用一些简单的 lambda 功能的另一个答案。import numpy as npimport pandas as pd""" Create data and data frame """info_dict = {&nbsp; &nbsp; 'ID': [1,2,3,4,5,],&nbsp; &nbsp; 'Complete_Name':[&nbsp; &nbsp; &nbsp; &nbsp; 'JERRY, Ben',&nbsp; &nbsp; &nbsp; &nbsp; 'VON HELSINKI, Olga',&nbsp; &nbsp; &nbsp; &nbsp; 'JENSEN, James Goodboy Dean',&nbsp; &nbsp; &nbsp; &nbsp; 'THE COMPANY',&nbsp; &nbsp; &nbsp; &nbsp; 'CRUZ, Juan S. de la',&nbsp; &nbsp; &nbsp; &nbsp; ],&nbsp; &nbsp; 'Type':['I','I','I','C','I',],&nbsp; &nbsp; }data = pd.DataFrame(info_dict, columns = info_dict.keys())""" List of columns to add """name_cols = [&nbsp; &nbsp; 'First Name',&nbsp; &nbsp; 'Middle Name',&nbsp; &nbsp; 'Last Name',&nbsp; &nbsp; ]"""Use partition() to separate first and middle names into Pandas series.Note: data[data['Type'] == 'I']['Complete_Name'] will allow us to target only thevalues that we want."""NO_LAST_NAMES = data[data['Type'] == 'I']['Complete_Name'].apply(lambda x: str(x).partition(',')[2].strip())LAST_NAMES = data[data['Type'] == 'I']['Complete_Name'].apply(lambda x: str(x).partition(',')[0].strip())# We can use index positions to quickly add columns to the dataframe.# The partition() function will keep the delimited value in the 1 index, so we'll use# the 0 and 2 index positions for first and middle names.data[name_cols[0]] = NO_LAST_NAMES.str.partition(' ')[0]data[name_cols[1]] = NO_LAST_NAMES.str.partition(' ')[2]# Finally, we'll add our Last Names columndata[name_cols[2]] = LAST_NAMES# Optional: We can replace all blank values with numpy.NaN values using regular expressions.data = data.replace(r'^$', np.NaN, regex=True)然后你应该得到这样的结果:&nbsp; &nbsp;ID&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Complete_Name Type First Name&nbsp; &nbsp;Middle Name&nbsp; &nbsp; &nbsp;Last Name0&nbsp; &nbsp;1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; JERRY, Ben&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp; Ben&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;NaN&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;JERRY1&nbsp; &nbsp;2&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; VON HELSINKI, Olga&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp;Olga&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;NaN&nbsp; VON HELSINKI2&nbsp; &nbsp;3&nbsp; JENSEN, James Goodboy Dean&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; James&nbsp; Goodboy Dean&nbsp; &nbsp; &nbsp; &nbsp; JENSEN3&nbsp; &nbsp;4&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;THE COMPANY&nbsp; &nbsp; C&nbsp; &nbsp; &nbsp; &nbsp; NaN&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;NaN&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;NaN4&nbsp; &nbsp;5&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;CRUZ, Juan S. de la&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp;Juan&nbsp; &nbsp; &nbsp; S. de la&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; CRUZ或者,用空字符串替换 NaN 值:data = data.replace(np.NaN, r'', regex=False)然后你有:&nbsp; &nbsp;ID&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Complete_Name Type First Name&nbsp; &nbsp;Middle Name&nbsp; &nbsp; &nbsp;Last Name0&nbsp; &nbsp;1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; JERRY, Ben&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp; Ben&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;JERRY1&nbsp; &nbsp;2&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; VON HELSINKI, Olga&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp;Olga&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; VON HELSINKI2&nbsp; &nbsp;3&nbsp; JENSEN, James Goodboy Dean&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; James&nbsp; Goodboy Dean&nbsp; &nbsp; &nbsp; &nbsp; JENSEN3&nbsp; &nbsp;4&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;THE COMPANY&nbsp; &nbsp; C&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;4&nbsp; &nbsp;5&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;CRUZ, Juan S. de la&nbsp; &nbsp; I&nbsp; &nbsp; &nbsp; &nbsp;Juan&nbsp; &nbsp; &nbsp; S. de la&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; CRUZ
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python