from django.db.models.signals import post_save, pre_delete from django.db.models import Q import peeringdb_server.rest from peeringdb_server.models import (UTC, InternetExchange, Network, Facility) import re import time import datetime # SEARCH INDEX BE STORED HERE SEARCH_CACHE = {"search_index": {}, "time": 0} # We want to hook searchable objects into save and delete signals # so we can update the search index as the data changes without having # to reload the entire thing all the time def hook_save(sender, **kwargs): obj = kwargs.get("instance") tag = obj._handleref.tag idx = SEARCH_CACHE.get("search_index") if obj.status == "ok": if tag not in idx: idx[tag] = {} idx.get(tag)[obj.id] = obj # print "%d %s refreshed in search index" % (obj.id, tag) else: try: del idx[tag][obj.id] except KeyError: pass # print "%d %s delete from search index" % (obj.id, tag) def hook_delete(sender, **kwargs): obj = kwargs.get("instance") tag = obj._handleref.tag try: del SEARCH_CACHE.get["search_index"][tag][obj.id] except TypeError: pass except KeyError: pass # print "%d %s deleted from search index " % (obj.id, tag) searchable_models = [InternetExchange, Network, Facility] for model in searchable_models: post_save.connect(hook_save, sender=model) pre_delete.connect(hook_delete, sender=model) def search(term): """ Search searchable objects (ixp, network, facility ...) by term Returns result dict """ search_tags = ('fac', 'ix', 'net') ref_dict = peeringdb_server.rest.ref_dict() t = time.time() if not SEARCH_CACHE.get("search_index"): # whole db takes 5ish seconds, too slow to cache inline here search_index = { tag: {obj.id: obj for obj in model.objects.filter(status__in=["ok"])} for tag, model in ref_dict.items() if tag in search_tags } for typ, stor in search_index.items(): print "CACHED: %d items in %s" % (len(stor), typ) tag_id_re = re.compile('(' + "|".join(search_tags) + '|asn|as)(\d+)') # FIXME: for now lets force a flush every 120 seconds, might want to look # at an event based update solution instead SEARCH_CACHE.update(search_index=search_index, time=t, update_t=t, tag_id_re=tag_id_re) else: search_index = SEARCH_CACHE.get('search_index') tag_id_re = SEARCH_CACHE.get('tag_id_re') # while we are using signals to make sure that the search index gets updated whenever # a model is saved, right now we still have updates from external sources # to which those signals cannot be easily connected (importer, fac_merge command etc.) # # in order to reflect search index changes made by external sources # we need to find new / updated object regularily and update the # search index from that # # FIXME: this can be taken out when we turn the importer off - or just leave it # in as a fail-safe as it is fairly unobtrusive ut = SEARCH_CACHE.get("update_t", 0) if t - ut > 600: dut = datetime.datetime.fromtimestamp(ut).replace(tzinfo=UTC()) print "Updating search index with newly created/updates objects" search_index_update = { tag: { obj.id: obj for obj in model.objects.filter( Q(created__gte=dut) | Q(updated__gte=dut)).filter(status="ok") } for tag, model in ref_dict.items() if tag in search_tags } for tag, objects in search_index_update.items(): if tag not in SEARCH_CACHE["search_index"]: SEARCH_CACHE["search_index"][tag] = dict( [(obj.id, obj) for obj in ref_dict[tag].objects.filter(status="ok")]) SEARCH_CACHE["search_index"][tag].update(objects) SEARCH_CACHE["update_t"] = t # FIXME: for some reason this gets unset sometimes - need to figure out # why - for now just recreate when its missing if not tag_id_re: tag_id_re = re.compile('(' + "|".join(search_tags) + '|asn|as)(\d+)') SEARCH_CACHE['tag_id_re'] = tag_id_re print "Search index retrieval took %.5f seconds" % (time.time() - t) result = {tag: [] for tag, model in ref_dict.items()} term = term.lower() # try to convert to int for numeric search matching typed_q = {} try: typed_q['int'] = int(term) except ValueError: pass # check for ref tags try: match = tag_id_re.match(term) if match: typed_q[match.group(1)] = match.group(2) except ValueError: pass # FIXME model should have a search_fields attr on it # this whole thing should be replaced with something more modular to get # rid of all the ifs for tag, index in search_index.items(): for id, data in index.items(): if data.name.lower().find(term) > -1: result[tag].append({"id": id, "name": data.search_result_name}) continue if hasattr(data, 'name_long') and data.name_long.lower().find(term) > -1: result[tag].append({"id": id, "name": data.search_result_name}) continue if hasattr(data, 'aka') and data.aka.lower().find(term) > -1: result[tag].append({"id": id, "name": data.search_result_name}) continue if typed_q: if tag in typed_q: if str(data.id).startswith(typed_q[tag]): result[tag].append({ "id": id, "name": data.search_result_name }) continue # search asn on everyting? probably just if asn in search # fields if hasattr(data, 'asn'): asn = typed_q.get('as', typed_q.get('asn', str(typed_q.get('int', '')))) if asn and str(data.asn).startswith(asn): result[tag].append({ "id": id, "name": data.search_result_name }) for k, items in result.items(): result[k] = sorted(items, key=lambda row: row.get("name")) return result