From 7b1861a58da2b7318e43876f9721d6830cc8b0f2 Mon Sep 17 00:00:00 2001 From: Stefan Pratter Date: Thu, 2 May 2019 15:20:20 +0000 Subject: [PATCH] un-accent search on quick search and api filters (#310) --- Pipfile | 8 +---- .../management/commands/pdb_api_test.py | 29 ++++++++++++++++ peeringdb_server/rest.py | 5 +++ peeringdb_server/search.py | 12 ++++--- requirements.txt | 1 + tests/test_search.py | 33 +++++++++++++++++-- 6 files changed, 74 insertions(+), 14 deletions(-) diff --git a/Pipfile b/Pipfile index 968e0d44..0decc316 100644 --- a/Pipfile +++ b/Pipfile @@ -1,12 +1,9 @@ [[source]] - name = "pypi" url = "https://pypi.org/simple" verify_ssl = true - [dev-packages] - pytest = ">=2.8.7" pytest-cov = ">=2.0.0" pytest-django = ">=2.9.1" @@ -14,9 +11,7 @@ pytest-filedata = ">=0.1.0" jsonschema = ">=2.6.0" facsimile = ">=1.1.1" - [packages] - certifi = "==2017.11.5" ipaddress = "==1.0.19" mysqlclient = "==1.3.9" @@ -56,8 +51,7 @@ django = "==1.11.20" uwsgi = "==2.0.14" markdown = "==2.6.7" "twentyc.rpc" = "==0.3.5" - +unidecode = "==1.0.23" [requires] - python_version = "2.7" diff --git a/peeringdb_server/management/commands/pdb_api_test.py b/peeringdb_server/management/commands/pdb_api_test.py index 6b7fabf7..d372c3c6 100644 --- a/peeringdb_server/management/commands/pdb_api_test.py +++ b/peeringdb_server/management/commands/pdb_api_test.py @@ -1,4 +1,5 @@ #!/bin/env python +# -*- coding: utf-8 -*- """ series of integration/unit tests for the pdb api """ @@ -2113,6 +2114,34 @@ class TestJSON(unittest.TestCase): self.assertEqual(target, comp) + ########################################################################## + + def test_guest_005_list_filter_accented(self): + + """ + test filtering with accented search terms + """ + + #TODO: sqlite3 is being used as the testing backend, and django 1.11 + #seems to be unable to set a collation on it, so we can't properly test + #the other way atm, for now this test at least confirms that the term is + #unaccented correctly. + # + #on production we run mysql with flattened accents so both ways should work + #there regardless. + + org = Organization.objects.create(name="org unaccented", status="ok") + net = Network.objects.create(asn=12345, name=u"net unaccented", + status="ok", org=org) + ix = InternetExchange.objects.create(org=org, name=u"ix unaccented", status="ok") + fac = Facility.objects.create(org=org, name=u"fac unaccented", status="ok") + + for tag in ["org","net","ix","fac"]: + data = self.db_guest.all(tag, name=u"{} unãccented".format(tag)) + self.assertEqual(len(data), 1) + + + ########################################################################## # READONLY PERMISSION TESTS # These tests assert that the readonly users cannot write anything diff --git a/peeringdb_server/rest.py b/peeringdb_server/rest.py index 538a346d..6a1d6dcf 100644 --- a/peeringdb_server/rest.py +++ b/peeringdb_server/rest.py @@ -1,4 +1,7 @@ import importlib + +import unidecode + from rest_framework import (routers, serializers, status, viewsets) from rest_framework.response import Response from rest_framework.views import exception_handler @@ -286,6 +289,8 @@ class ModelViewSet(viewsets.ModelViewSet): filters = {} for k, v in self.request.query_params.items(): + v = unidecode.unidecode(v) + if k[-3:] == "_id" and k not in field_names: k = k[:-3] diff --git a/peeringdb_server/search.py b/peeringdb_server/search.py index 00c192d1..b43d4708 100644 --- a/peeringdb_server/search.py +++ b/peeringdb_server/search.py @@ -5,6 +5,10 @@ from peeringdb_server.models import (UTC, InternetExchange, Network, Facility) import re import time import datetime +import unidecode + +def unaccent(v): + return unidecode.unidecode(v).lower() # SEARCH INDEX BE STORED HERE @@ -130,7 +134,7 @@ def search(term): result = {tag: [] for tag, model in ref_dict.items()} - term = term.lower() + term = unaccent(term) # try to convert to int for numeric search matching typed_q = {} @@ -153,7 +157,7 @@ def search(term): # rid of all the ifs for tag, index in search_index.items(): for id, data in index.items(): - if data.name.lower().find(term) > -1: + if unaccent(data.name).find(term) > -1: result[tag].append({ "id": id, "name": data.search_result_name, @@ -162,7 +166,7 @@ def search(term): continue if hasattr(data, - 'name_long') and data.name_long.lower().find(term) > -1: + 'name_long') and unaccent(data.name_long).find(term) > -1: result[tag].append({ "id": id, "name": data.search_result_name, @@ -170,7 +174,7 @@ def search(term): }) continue - if hasattr(data, 'aka') and data.aka.lower().find(term) > -1: + if hasattr(data, 'aka') and unaccent(data.aka).find(term) > -1: result[tag].append({ "id": id, "name": data.search_result_name, diff --git a/requirements.txt b/requirements.txt index d6fa4186..1a1ff469 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ Markdown==2.6.7 bleach==2.1.3 coreapi==2.3.1 googlemaps==2.5.1 +Unidecode==1.0.23 django-allauth==0.32.0 django-autocomplete-light==3.2.9 diff --git a/tests/test_search.py b/tests/test_search.py index 5685cd0c..9ee3bbd4 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Unit-tests for quick search functionality - note that advanced search is not tested here as that is using the PDB API entirely. @@ -6,6 +7,7 @@ import re import datetime import pytest +import unidecode from django.test import TestCase, RequestFactory @@ -27,20 +29,26 @@ class SearchTests(TestCase): search.SEARCH_CACHE["search_index"] = {} cls.instances = {} + cls.instances_accented = {} cls.instances_sponsored = {} # create an instance of each searchable model, so we have something # to search for cls.org = models.Organization.objects.create(name="Test org") for model in search.searchable_models: + kwargs = {} if model.handleref.tag == "net": kwargs = {"asn": 1} - else: - kwargs = {} cls.instances[model.handleref.tag] = model.objects.create( status="ok", org=cls.org, name="Test %s" % model.handleref.tag, **kwargs) + if model.handleref.tag == "net": + kwargs = {"asn": 2} + cls.instances_accented[model.handleref.tag] = model.objects.create( + status="ok", org=cls.org, + name=u"ãccented {}".format(model.handleref.tag), **kwargs) + # we also need to test that sponsor ship status comes through # accordingly cls.org_w_sponsorship = models.Organization.objects.create(name="Sponsor org", status="ok") @@ -52,7 +60,7 @@ class SearchTests(TestCase): for model in search.searchable_models: if model.handleref.tag == "net": - kwargs = {"asn": 2} + kwargs = {"asn": 3} else: kwargs = {} cls.instances_sponsored[model.handleref.tag] = model.objects.create( @@ -140,3 +148,22 @@ class SearchTests(TestCase): new_ix_p.delete() new_ix_o.delete() self.test_search() + + def test_search_unaccent(self): + """ + search for entities containing 'ãccented' using accented and unaccented + terms + """ + rv = search.search(u"accented") + for k, inst in self.instances_accented.items(): + assert k in rv + assert len(rv[k]) == 1 + assert unidecode.unidecode(rv[k][0]["name"]) == unidecode.unidecode(inst.search_result_name) + + rv = search.search(u"ãccented") + for k, inst in self.instances_accented.items(): + assert k in rv + assert len(rv[k]) == 1 + assert unidecode.unidecode(rv[k][0]["name"]) == unidecode.unidecode(inst.search_result_name) + +