mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Search: support for quoted values
also adds tests and factors out the query parsing
This commit is contained in:
@@ -6,12 +6,16 @@ least it means we have all of this together in a separate file this way.
|
||||
|
||||
import re
|
||||
from django.db.models import Q, Subquery
|
||||
from collections import namedtuple
|
||||
|
||||
from bugsink.moreiterutils import tuplewise
|
||||
|
||||
from .models import TagValue, IssueTag, EventTag
|
||||
|
||||
|
||||
ParsedQuery = namedtuple("ParsedQ", ["tags", "plain_text"])
|
||||
|
||||
|
||||
def _remove_slices(s, slices_to_remove):
|
||||
"""Returns s with the slices removed."""
|
||||
items = [item for tup in slices_to_remove for item in tup]
|
||||
@@ -31,17 +35,45 @@ def _and_join(q_objects):
|
||||
return result
|
||||
|
||||
|
||||
def parse_query(q):
|
||||
# The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
|
||||
# text"; no support for quoting of spaces.
|
||||
tags = {}
|
||||
|
||||
slices_to_remove = []
|
||||
|
||||
# first, match all key:value pairs with unquoted values
|
||||
for match in re.finditer(r'(\S+):([^\s"]+)', q):
|
||||
slices_to_remove.append(match.span())
|
||||
key, value = match.groups()
|
||||
tags[key] = value
|
||||
|
||||
# then, match all key:"quoted value" pairs
|
||||
for match in re.finditer(r'(\S+):"([^"]+)"', q):
|
||||
slices_to_remove.append(match.span())
|
||||
key, value = match.groups()
|
||||
tags[key] = value
|
||||
|
||||
slices_to_remove.sort(key=lambda tup: tup[0]) # _remove_slices expects the slices to be sorted
|
||||
|
||||
# this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
|
||||
# have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
|
||||
# to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
|
||||
plain_text_q = _remove_slices(q, slices_to_remove).strip()
|
||||
|
||||
return ParsedQuery(tags, plain_text_q)
|
||||
|
||||
|
||||
def _search(TagClz, fk_fieldname, project, obj_list, q):
|
||||
if not q:
|
||||
return obj_list
|
||||
|
||||
parsed = parse_query(q)
|
||||
|
||||
# The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
|
||||
# text"; no support for quoting of spaces.
|
||||
slices_to_remove = []
|
||||
clauses = []
|
||||
for match in re.finditer(r"(\S+):(\S+)", q):
|
||||
slices_to_remove.append(match.span())
|
||||
key, value = match.groups()
|
||||
for key, value in parsed.tags.items():
|
||||
try:
|
||||
tag_value_obj = TagValue.objects.get(project=project, key__key=key, value=value)
|
||||
except TagValue.DoesNotExist:
|
||||
@@ -58,9 +90,9 @@ def _search(TagClz, fk_fieldname, project, obj_list, q):
|
||||
# this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
|
||||
# have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
|
||||
# to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
|
||||
plain_text_q = _remove_slices(q, slices_to_remove).strip()
|
||||
if plain_text_q:
|
||||
clauses.append(Q(Q(calculated_type__icontains=plain_text_q) | Q(calculated_value__icontains=plain_text_q)))
|
||||
if parsed.plain_text:
|
||||
clauses.append(
|
||||
Q(Q(calculated_type__icontains=parsed.plain_text) | Q(calculated_value__icontains=parsed.plain_text)))
|
||||
|
||||
# if we reach this point, there's always either a plain_text_q or some key/value pair (this is a condition for
|
||||
# _and_join)
|
||||
|
||||
@@ -9,7 +9,7 @@ from events.models import Event
|
||||
|
||||
from .models import store_tags
|
||||
from .utils import deduce_tags
|
||||
from .search import search_events, search_issues
|
||||
from .search import search_events, search_issues, parse_query
|
||||
|
||||
|
||||
class DeduceTagsTestCase(RegularTestCase):
|
||||
@@ -101,6 +101,54 @@ class StoreTagsTestCase(DjangoTestCase):
|
||||
self.assertEqual(self.issue.tags.first().value.key.key, "foo")
|
||||
|
||||
|
||||
class SearchParserTestCase(RegularTestCase):
|
||||
|
||||
def test_parser(self):
|
||||
# we don't actually do the below, empty queries are never parsed
|
||||
# self.assertEquals(({}, ""), parse_query(""))
|
||||
|
||||
self.assertEquals(({}, "FindableException"), parse_query("FindableException"))
|
||||
self.assertEquals(({}, "findable value"), parse_query("findable value"))
|
||||
|
||||
self.assertEquals(({"key": "value"}, ""), parse_query("key:value"))
|
||||
self.assertEquals(
|
||||
({"key": "value", "anotherkey": "anothervalue"}, ""),
|
||||
parse_query("key:value anotherkey:anothervalue"))
|
||||
|
||||
self.assertEquals(
|
||||
({"keys.may.have.dots": "values.may.have.dots.too"}, ""),
|
||||
parse_query("keys.may.have.dots:values.may.have.dots.too"))
|
||||
|
||||
self.assertEquals(
|
||||
({"key": "value"}, "some text goes here"),
|
||||
parse_query("key:value some text goes here"))
|
||||
|
||||
self.assertEquals(
|
||||
({}, "text with spaces everywhere"),
|
||||
parse_query("text with spaces everywhere"))
|
||||
|
||||
self.assertEquals(
|
||||
({}, "key: preceded by space"),
|
||||
parse_query("key: preceded by space"))
|
||||
|
||||
self.assertEquals(
|
||||
({"key": "quoted value"}, ""),
|
||||
parse_query('key:"quoted value"'))
|
||||
|
||||
self.assertEquals(
|
||||
({"key": "quoted value"}, "and further text"),
|
||||
parse_query('key:"quoted value" and further text'))
|
||||
|
||||
# This is the kind of test that just documents "what is" rather than "what I believe is right". The weirdness
|
||||
# here is mostly the double space "on both" which is the result of just cutting out the key:value bits. But...
|
||||
# I'm not invested in getting this more precise (yet), because this whole case is a bit weird. I'd much rather
|
||||
# point people in the direction of "put k:v at the beginning, and any free text at the end" (which is something
|
||||
# we could even validate on at some later point).
|
||||
self.assertEquals(
|
||||
({"key": "value"}, "text on both sides"),
|
||||
parse_query("text on key:value both sides"))
|
||||
|
||||
|
||||
class SearchTestCase(DjangoTestCase):
|
||||
"""'Integration'-test; assuming Tags are stored correctly in the DB, can we search for them?"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user