peeringdb-peeringdb/peeringdb_server/search.py

from django.db.models.signals import post_save, pre_delete
from django.db.models import Q
import peeringdb_server.rest
from peeringdb_server.models import (UTC, InternetExchange, Network, Facility)
import re
import time
import datetime
import unidecode

def unaccent(v):
    return unidecode.unidecode(v).lower()

# SEARCH INDEX BE STORED HERE

SEARCH_CACHE = {"search_index": {}, "time": 0}

# We want to hook searchable objects into save and delete signals
# so we can update the search index as the data changes without having
# to reload the entire thing all the time


def hook_save(sender, **kwargs):
    obj = kwargs.get("instance")
    tag = obj._handleref.tag
    idx = SEARCH_CACHE.get("search_index")
    if obj.status == "ok":
        if tag not in idx:
            idx[tag] = {}
        idx.get(tag)[obj.id] = obj
#        print "%d %s refreshed in search index" % (obj.id, tag)
    else:
        try:
            del idx[tag][obj.id]
        except KeyError:
            pass


#        print "%d %s delete from search index" % (obj.id, tag)


def hook_delete(sender, **kwargs):
    obj = kwargs.get("instance")
    tag = obj._handleref.tag
    try:
        del SEARCH_CACHE.get["search_index"][tag][obj.id]
    except TypeError:
        pass
    except KeyError:
        pass


#    print "%d %s deleted from search index " % (obj.id, tag)

searchable_models = [InternetExchange, Network, Facility]

for model in searchable_models:
    post_save.connect(hook_save, sender=model)
    pre_delete.connect(hook_delete, sender=model)


def search(term):
    """
    Search searchable objects (ixp, network, facility ...) by term

    Returns result dict
    """

    search_tags = ('fac', 'ix', 'net')
    ref_dict = peeringdb_server.rest.ref_dict()
    t = time.time()

    if not SEARCH_CACHE.get("search_index"):

        # whole db takes 5ish seconds, too slow to cache inline here
        search_index = {
            tag:
            {obj.id: obj
             for obj in model.objects.filter(status__in=["ok"])}
            for tag, model in ref_dict.items() if tag in search_tags
        }

        for typ, stor in search_index.items():
            print "CACHED: %d items in %s" % (len(stor), typ)

        tag_id_re = re.compile('(' + "|".join(search_tags) + '|asn|as)(\d+)')

        # FIXME: for now lets force a flush every 120 seconds, might want to look
        # at an event based update solution instead
        SEARCH_CACHE.update(search_index=search_index, time=t, update_t=t,
                            tag_id_re=tag_id_re)
    else:
        search_index = SEARCH_CACHE.get('search_index')
        tag_id_re = SEARCH_CACHE.get('tag_id_re')

    # while we are using signals to make sure that the search index gets updated whenever
    # a model is saved, right now we still have updates from external sources
    # to which those signals cannot be easily connected (importer, fac_merge command etc.)
    #
    # in order to reflect search index changes made by external sources
    # we need to find new / updated object regularily and update the
    # search index from that
    #
    # FIXME: this can be taken out when we turn the importer off - or just leave it
    # in as a fail-safe as it is fairly unobtrusive
    ut = SEARCH_CACHE.get("update_t", 0)
    if t - ut > 600:
        dut = datetime.datetime.fromtimestamp(ut).replace(tzinfo=UTC())
        print "Updating search index with newly created/updates objects"
        search_index_update = {
            tag: {
                obj.id: obj
                for obj in model.objects.filter(
                    Q(created__gte=dut)
                    | Q(updated__gte=dut)).filter(status="ok")
            }
            for tag, model in ref_dict.items() if tag in search_tags
        }
        for tag, objects in search_index_update.items():
            if tag not in SEARCH_CACHE["search_index"]:
                SEARCH_CACHE["search_index"][tag] = dict(
                    [(obj.id, obj)
                     for obj in ref_dict[tag].objects.filter(status="ok")])
            SEARCH_CACHE["search_index"][tag].update(objects)

        SEARCH_CACHE["update_t"] = t

    # FIXME: for some reason this gets unset sometimes - need to figure out
    # why - for now just recreate when its missing
    if not tag_id_re:
        tag_id_re = re.compile('(' + "|".join(search_tags) + '|asn|as)(\d+)')
        SEARCH_CACHE['tag_id_re'] = tag_id_re

    print "Search index retrieval took %.5f seconds" % (time.time() - t)

    result = {tag: [] for tag, model in ref_dict.items()}

    term = unaccent(term)

    # try to convert to int for numeric search matching
    typed_q = {}
    try:
        typed_q['int'] = int(term)
    except ValueError:
        pass

    # check for ref tags
    try:
        match = tag_id_re.match(term)
        if match:
            typed_q[match.group(1)] = match.group(2)

    except ValueError:
        pass

    # FIXME  model should have a search_fields attr on it
    # this whole thing should be replaced with something more modular to get
    # rid of all the ifs
    for tag, index in search_index.items():
        for id, data in index.items():
            if unaccent(data.name).find(term) > -1:
                result[tag].append({
                    "id": id,
                    "name": data.search_result_name,
                    "org_id": data.org_id
                })
                continue

            if hasattr(data,
                       'name_long') and unaccent(data.name_long).find(term) > -1:
                result[tag].append({
                    "id": id,
                    "name": data.search_result_name,
                    "org_id" : data.org_id
                })
                continue

            if hasattr(data, 'aka') and unaccent(data.aka).find(term) > -1:
                result[tag].append({
                    "id": id,
                    "name": data.search_result_name,
                    "org_id": data.org_id
                })
                continue

            if typed_q:
                if tag in typed_q:
                    if str(data.id).startswith(typed_q[tag]):
                        result[tag].append({
                            "id": id,
                            "name": data.search_result_name,
                            "org_id": data.org_id
                        })
                        continue

                # search asn on everyting? probably just if asn in search
                # fields
                if hasattr(data, 'asn'):
                    asn = typed_q.get('as',
                                      typed_q.get('asn',
                                                  str(typed_q.get('int', ''))))
                    if asn and str(data.asn).startswith(asn):
                        result[tag].append({
                            "id": id,
                            "name": data.search_result_name,
                            "org_id": data.org_id
                        })

    for k, items in result.items():
        result[k] = sorted(items, key=lambda row: row.get("name"))

    return result