4 Group emails

Given 1 million email list: list 1: [email protected], [email protected] list 2: [email protected], [email protected] list 3: [email protected] list 4: [email protected] ... Combine lists with identical emails, and output tuples: (list 1, list 2, list 4) ([email protected], [email protected], [email protected]) (list 3) ([email protected]) 如何用并查集解决

思路:直接union find扫一遍。把所有email都带一个parent。然后再扫一遍email。根据他们的parent来cosntruct一个结果就行了。parent想同的email就是在一个group的

import collections
class Solution(object):

    def combineEmails(self, email_lists):

        dic = {}
        u = Union()
        for l in email_lists:
            for e in l:
                if e not in u.dic:
                    u.add(e)
                u.unite(e, l[0])

        # construct output based on parents
        tmp = collections.defaultdict(list)
        for k in u.dic:
            tmp[u.find(k)].append(k)

        res = [tmp[i] for i in tmp]

        return res


class Union(object):
    def __init__(self):
        self.dic = {}
        self.sz = {}

    def find(self, i):
        if self.dic[i]!=self.dic[self.dic[i]]:
            self.dic[i]= self.dic[self.dic[i]]
        return self.dic[i]

    def add(self, i):
        self.dic[i] = i
        self.sz[i] = 1

    def unite(self, i, j):
        if self.find(i)!=self.find(j):
            p, q = self.find(i), self.find(j)

            if self.sz[p]>self.sz[q]:
                self.dic[q] = p
                self.sz[p] += self.sz[q]
            else:
                self.dic[p] = q
                self.sz[q] += self.sz[p]


email_lists = [['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]'], ['[email protected]']]
so = Solution()
ans = so.combineEmails(email_lists)
print(ans)
[['[email protected]'], ['[email protected]', '[email protected]', '[email protected]']]

results matching ""

    No results matching ""