4 Group emails
Given 1 million email list: list 1: [email protected], [email protected] list 2: [email protected], [email protected] list 3: [email protected] list 4: [email protected] ... Combine lists with identical emails, and output tuples: (list 1, list 2, list 4) ([email protected], [email protected], [email protected]) (list 3) ([email protected]) 如何用并查集解决
思路:直接union find扫一遍。把所有email都带一个parent。然后再扫一遍email。根据他们的parent来cosntruct一个结果就行了。parent想同的email就是在一个group的
import collections
class Solution(object):
def combineEmails(self, email_lists):
dic = {}
u = Union()
for l in email_lists:
for e in l:
if e not in u.dic:
u.add(e)
u.unite(e, l[0])
# construct output based on parents
tmp = collections.defaultdict(list)
for k in u.dic:
tmp[u.find(k)].append(k)
res = [tmp[i] for i in tmp]
return res
class Union(object):
def __init__(self):
self.dic = {}
self.sz = {}
def find(self, i):
if self.dic[i]!=self.dic[self.dic[i]]:
self.dic[i]= self.dic[self.dic[i]]
return self.dic[i]
def add(self, i):
self.dic[i] = i
self.sz[i] = 1
def unite(self, i, j):
if self.find(i)!=self.find(j):
p, q = self.find(i), self.find(j)
if self.sz[p]>self.sz[q]:
self.dic[q] = p
self.sz[p] += self.sz[q]
else:
self.dic[p] = q
self.sz[q] += self.sz[p]
email_lists = [['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]'], ['[email protected]']]
so = Solution()
ans = so.combineEmails(email_lists)
print(ans)
[['[email protected]'], ['[email protected]', '[email protected]', '[email protected]']]