Search: support for quoted values

also adds tests and factors out the query parsing
2026-03-10 08:01:17 +00:00 · 2025-03-06 11:23:18 +01:00
parent 1fa7436b2d
commit 2c9d5c80ed
2 changed files with 88 additions and 8 deletions
--- a/tags/search.py
+++ b/tags/search.py
@@ -6,12 +6,16 @@ least it means we have all of this together in a separate file this way.

 import re
 from django.db.models import Q, Subquery
+from collections import namedtuple

 from bugsink.moreiterutils import tuplewise

 from .models import TagValue, IssueTag, EventTag


+ParsedQuery = namedtuple("ParsedQ", ["tags", "plain_text"])
+
+
 def _remove_slices(s, slices_to_remove):
    """Returns s with the slices removed."""
    items = [item for tup in slices_to_remove for item in tup]
@@ -31,17 +35,45 @@ def _and_join(q_objects):
    return result


+def parse_query(q):
+    # The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
+    # text"; no support for quoting of spaces.
+    tags = {}
+
+    slices_to_remove = []
+
+    # first, match all key:value pairs with unquoted values
+    for match in re.finditer(r'(\S+):([^\s"]+)', q):
+        slices_to_remove.append(match.span())
+        key, value = match.groups()
+        tags[key] = value
+
+    # then, match all key:"quoted value" pairs
+    for match in re.finditer(r'(\S+):"([^"]+)"', q):
+        slices_to_remove.append(match.span())
+        key, value = match.groups()
+        tags[key] = value
+
+    slices_to_remove.sort(key=lambda tup: tup[0])  # _remove_slices expects the slices to be sorted
+
+    # this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
+    # have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
+    # to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
+    plain_text_q = _remove_slices(q, slices_to_remove).strip()
+
+    return ParsedQuery(tags, plain_text_q)
+
+
 def _search(TagClz, fk_fieldname, project, obj_list, q):
    if not q:
        return obj_list

+    parsed = parse_query(q)
+
    # The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
    # text"; no support for quoting of spaces.
-    slices_to_remove = []
    clauses = []
-    for match in re.finditer(r"(\S+):(\S+)", q):
-        slices_to_remove.append(match.span())
-        key, value = match.groups()
+    for key, value in parsed.tags.items():
        try:
            tag_value_obj = TagValue.objects.get(project=project, key__key=key, value=value)
        except TagValue.DoesNotExist:
@@ -58,9 +90,9 @@ def _search(TagClz, fk_fieldname, project, obj_list, q):
    # this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
    # have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
    # to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
-    plain_text_q = _remove_slices(q, slices_to_remove).strip()
-    if plain_text_q:
-        clauses.append(Q(Q(calculated_type__icontains=plain_text_q) | Q(calculated_value__icontains=plain_text_q)))
+    if parsed.plain_text:
+        clauses.append(
+            Q(Q(calculated_type__icontains=parsed.plain_text) | Q(calculated_value__icontains=parsed.plain_text)))

    # if we reach this point, there's always either a plain_text_q or some key/value pair (this is a condition for
    # _and_join)
--- a/tags/tests.py
+++ b/tags/tests.py
@@ -9,7 +9,7 @@ from events.models import Event

 from .models import store_tags
 from .utils import deduce_tags
-from .search import search_events, search_issues
+from .search import search_events, search_issues, parse_query


 class DeduceTagsTestCase(RegularTestCase):
@@ -101,6 +101,54 @@ class StoreTagsTestCase(DjangoTestCase):
        self.assertEqual(self.issue.tags.first().value.key.key, "foo")


+class SearchParserTestCase(RegularTestCase):
+
+    def test_parser(self):
+        # we don't actually do the below, empty queries are never parsed
+        # self.assertEquals(({}, ""), parse_query(""))
+
+        self.assertEquals(({}, "FindableException"), parse_query("FindableException"))
+        self.assertEquals(({}, "findable value"), parse_query("findable value"))
+
+        self.assertEquals(({"key": "value"}, ""),  parse_query("key:value"))
+        self.assertEquals(
+            ({"key": "value", "anotherkey": "anothervalue"}, ""),
+            parse_query("key:value anotherkey:anothervalue"))
+
+        self.assertEquals(
+            ({"keys.may.have.dots": "values.may.have.dots.too"}, ""),
+            parse_query("keys.may.have.dots:values.may.have.dots.too"))
+
+        self.assertEquals(
+            ({"key": "value"}, "some text goes here"),
+            parse_query("key:value some text goes here"))
+
+        self.assertEquals(
+            ({}, "text  with  spaces  everywhere"),
+            parse_query("text  with  spaces  everywhere"))
+
+        self.assertEquals(
+            ({}, "key: preceded by space"),
+            parse_query("key: preceded by space"))
+
+        self.assertEquals(
+            ({"key": "quoted value"}, ""),
+            parse_query('key:"quoted value"'))
+
+        self.assertEquals(
+            ({"key": "quoted value"}, "and further text"),
+            parse_query('key:"quoted value" and further text'))
+
+        # This is the kind of test that just documents "what is" rather than "what I believe is right". The weirdness
+        # here is mostly the double space "on  both" which is the result of just cutting out the key:value bits. But...
+        # I'm not invested in getting this more precise (yet), because this whole case is a bit weird. I'd much rather
+        # point people in the direction of "put k:v at the beginning, and any free text at the end" (which is something
+        # we could even validate on at some later point).
+        self.assertEquals(
+            ({"key": "value"}, "text on  both sides"),
+            parse_query("text on key:value both sides"))
+
+
 class SearchTestCase(DjangoTestCase):
    """'Integration'-test; assuming Tags are stored correctly in the DB, can we search for them?"""