From 2c9d5c80edda1688b10cba1db926841a81ec2135 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Thu, 6 Mar 2025 11:23:18 +0100 Subject: [PATCH] Search: support for quoted values also adds tests and factors out the query parsing --- tags/search.py | 46 +++++++++++++++++++++++++++++++++++++++------- tags/tests.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/tags/search.py b/tags/search.py index 38876f5..143c33b 100644 --- a/tags/search.py +++ b/tags/search.py @@ -6,12 +6,16 @@ least it means we have all of this together in a separate file this way. import re from django.db.models import Q, Subquery +from collections import namedtuple from bugsink.moreiterutils import tuplewise from .models import TagValue, IssueTag, EventTag +ParsedQuery = namedtuple("ParsedQ", ["tags", "plain_text"]) + + def _remove_slices(s, slices_to_remove): """Returns s with the slices removed.""" items = [item for tup in slices_to_remove for item in tup] @@ -31,17 +35,45 @@ def _and_join(q_objects): return result +def parse_query(q): + # The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free + # text"; no support for quoting of spaces. + tags = {} + + slices_to_remove = [] + + # first, match all key:value pairs with unquoted values + for match in re.finditer(r'(\S+):([^\s"]+)', q): + slices_to_remove.append(match.span()) + key, value = match.groups() + tags[key] = value + + # then, match all key:"quoted value" pairs + for match in re.finditer(r'(\S+):"([^"]+)"', q): + slices_to_remove.append(match.span()) + key, value = match.groups() + tags[key] = value + + slices_to_remove.sort(key=lambda tup: tup[0]) # _remove_slices expects the slices to be sorted + + # this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then + # have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother + # to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title'). + plain_text_q = _remove_slices(q, slices_to_remove).strip() + + return ParsedQuery(tags, plain_text_q) + + def _search(TagClz, fk_fieldname, project, obj_list, q): if not q: return obj_list + parsed = parse_query(q) + # The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free # text"; no support for quoting of spaces. - slices_to_remove = [] clauses = [] - for match in re.finditer(r"(\S+):(\S+)", q): - slices_to_remove.append(match.span()) - key, value = match.groups() + for key, value in parsed.tags.items(): try: tag_value_obj = TagValue.objects.get(project=project, key__key=key, value=value) except TagValue.DoesNotExist: @@ -58,9 +90,9 @@ def _search(TagClz, fk_fieldname, project, obj_list, q): # this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then # have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother # to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title'). - plain_text_q = _remove_slices(q, slices_to_remove).strip() - if plain_text_q: - clauses.append(Q(Q(calculated_type__icontains=plain_text_q) | Q(calculated_value__icontains=plain_text_q))) + if parsed.plain_text: + clauses.append( + Q(Q(calculated_type__icontains=parsed.plain_text) | Q(calculated_value__icontains=parsed.plain_text))) # if we reach this point, there's always either a plain_text_q or some key/value pair (this is a condition for # _and_join) diff --git a/tags/tests.py b/tags/tests.py index 9c4c2ae..0714406 100644 --- a/tags/tests.py +++ b/tags/tests.py @@ -9,7 +9,7 @@ from events.models import Event from .models import store_tags from .utils import deduce_tags -from .search import search_events, search_issues +from .search import search_events, search_issues, parse_query class DeduceTagsTestCase(RegularTestCase): @@ -101,6 +101,54 @@ class StoreTagsTestCase(DjangoTestCase): self.assertEqual(self.issue.tags.first().value.key.key, "foo") +class SearchParserTestCase(RegularTestCase): + + def test_parser(self): + # we don't actually do the below, empty queries are never parsed + # self.assertEquals(({}, ""), parse_query("")) + + self.assertEquals(({}, "FindableException"), parse_query("FindableException")) + self.assertEquals(({}, "findable value"), parse_query("findable value")) + + self.assertEquals(({"key": "value"}, ""), parse_query("key:value")) + self.assertEquals( + ({"key": "value", "anotherkey": "anothervalue"}, ""), + parse_query("key:value anotherkey:anothervalue")) + + self.assertEquals( + ({"keys.may.have.dots": "values.may.have.dots.too"}, ""), + parse_query("keys.may.have.dots:values.may.have.dots.too")) + + self.assertEquals( + ({"key": "value"}, "some text goes here"), + parse_query("key:value some text goes here")) + + self.assertEquals( + ({}, "text with spaces everywhere"), + parse_query("text with spaces everywhere")) + + self.assertEquals( + ({}, "key: preceded by space"), + parse_query("key: preceded by space")) + + self.assertEquals( + ({"key": "quoted value"}, ""), + parse_query('key:"quoted value"')) + + self.assertEquals( + ({"key": "quoted value"}, "and further text"), + parse_query('key:"quoted value" and further text')) + + # This is the kind of test that just documents "what is" rather than "what I believe is right". The weirdness + # here is mostly the double space "on both" which is the result of just cutting out the key:value bits. But... + # I'm not invested in getting this more precise (yet), because this whole case is a bit weird. I'd much rather + # point people in the direction of "put k:v at the beginning, and any free text at the end" (which is something + # we could even validate on at some later point). + self.assertEquals( + ({"key": "value"}, "text on both sides"), + parse_query("text on key:value both sides")) + + class SearchTestCase(DjangoTestCase): """'Integration'-test; assuming Tags are stored correctly in the DB, can we search for them?"""