4 Group emails
Given 1 million email list: list 1: a@a.com, b@b.com list 2: b@b.com, c@c.com list 3: e@e.com list 4: a@a.com ... Combine lists with identical emails, and output tuples: (list 1, list 2, list 4) (a@a.com, b@b.com, c@c.com) (list 3) (e@e.com) 如何用并查集解决
思路:直接union find扫一遍。把所有email都带一个parent。然后再扫一遍email。根据他们的parent来cosntruct一个结果就行了。parent想同的email就是在一个group的
import collections
class Solution(object):
def combineEmails(self, email_lists):
dic = {}
u = Union()
for l in email_lists:
for e in l:
if e not in u.dic:
u.add(e)
u.unite(e, l[0])
# construct output based on parents
tmp = collections.defaultdict(list)
for k in u.dic:
tmp[u.find(k)].append(k)
res = [tmp[i] for i in tmp]
return res
class Union(object):
def __init__(self):
self.dic = {}
self.sz = {}
def find(self, i):
if self.dic[i]!=self.dic[self.dic[i]]:
self.dic[i]= self.dic[self.dic[i]]
return self.dic[i]
def add(self, i):
self.dic[i] = i
self.sz[i] = 1
def unite(self, i, j):
if self.find(i)!=self.find(j):
p, q = self.find(i), self.find(j)
if self.sz[p]>self.sz[q]:
self.dic[q] = p
self.sz[p] += self.sz[q]
else:
self.dic[p] = q
self.sz[q] += self.sz[p]
email_lists = [['a@a.com', 'b@b.com'], ['b@b.com', 'c@c.com'], ['e@e.com'], ['a@a.com']]
so = Solution()
ans = so.combineEmails(email_lists)
print(ans)
[['e@e.com'], ['c@c.com', 'a@a.com', 'b@b.com']]