import math

import pandas as pd

import csv

from tqdm import tqdm

def mask(x):
     if not x.cls1:
         return x
     else:
        if pd.isna(x.cls4):
            return x
        else:
            if x.cls4.find(' ') == -1:
                x.cls1=x.cls4
                return x
            else:
                words=x.cls4.split()
                x.cls1=words[0]
                x.cls2=words[1]
                return x
df = pd.read_csv("./2000qV15.txt", sep='\t', encoding='utf8', dtype=str, quoting=csv.QUOTE_NONE)
# df=df.dropna(subset=['url'], how='any').reset_index(drop=True)
# df=df[df['obj_url'].isin(["-"])]['obj_url'].count()
df1 = pd.read_csv("./2000qV15.res.hangye", sep='\t', encoding='utf8', dtype=str, quoting=csv.QUOTE_NONE)

df = df.merge(df1, how="left", on=["query"], suffixes=('', '_DROP'))
df=df.apply(mask, axis=1 )
df.to_csv("./titi.txt", index=False, sep='\t',encoding='utf8', quoting=csv.QUOTE_NONE)
# df=df.drop(['cls4'], axis=1)

print(df)

pandas的apply应用_python