def flat(l):
for k in l:
if not isinstance(k, (list, tuple)):
yield k
else:
yield from flat(k)
def mkdf_tojoin(df):
schema = df.schema.add(StructField("tmpid", LongType()))
rdd = df.rdd.zipWithIndex()
rdd = rdd.map(lambda x: list(flat(x)))
df = ss.createDataFrame(rdd, schema)
return df
参考:
https://blog.csdn.net/weixin_43668299/article/details/103269810