mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
api.json.schema: put back in code, make test fail on invalidness and related fixes
This reverts course on 4201fbd778, and restores event.schema.json from that
commit. In that commit we said: 'this is not used'. Not true: it's used in a
test, though this test used the validity check to silently skip.
In this commit:
1. Do _not_ just silently skip invalid samples. Since we have a way of properly
validating, let's use that so that we know how useful the samples that we have
actually are.
2. Deal with "_meta", a field that we sometimes see in the "private samples" (data
that ultimately comes from running a somewhat recent python-sdk against my
actual codebase). The need for this was exposed by [1]
3. Add a test for the up-to-date-ness of event.json.schema
4. remove special-cased attribute-checks in `is_valid`; `send_json` was, at the
time, an opportunistic way to just get my hands on some sample data. the
approach at validation reflected that: I just did some tests on the existence
of certain attributes to determine which json files were even events. But in
the end I did a full validation using an API schema, which kinda made the
whole business useless. This commit cleans up the individual checks.
This commit is contained in:
10
api/LICENSE
Normal file
10
api/LICENSE
Normal file
@@ -0,0 +1,10 @@
|
||||
This licence applies to the file: event.schema.json
|
||||
The source of this file is: https://raw.githubusercontent.com/getsentry/sentry-data-schemas/main/LICENSE
|
||||
|
||||
Copyright (c) 2020 Sentry (https://sentry.io) and individual contributors.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
3773
api/event.schema.json
Normal file
3773
api/event.schema.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,46 +30,21 @@ class Command(BaseCommand):
|
||||
parser.add_argument("identifiers", nargs="+")
|
||||
|
||||
def is_valid(self, data, identifier):
|
||||
if "event_id" not in data:
|
||||
self.stderr.write("%s %s" % ("Probably not a (single) event", identifier))
|
||||
return False
|
||||
|
||||
if "platform" not in data:
|
||||
# in a few cases this value isn't set either in the sentry test data but I'd rather ignore those...
|
||||
# because 'platform' is such a valuable piece of info while getting a sense of the shape of the data
|
||||
self.stderr.write("%s %s" % ("Platform not set", identifier))
|
||||
return False
|
||||
|
||||
if data.get("type", "") == "transaction":
|
||||
# kinda weird that this is in the "type" field rather than endpoint/envelope but who cares, that's
|
||||
# where the info lives and we use it as an indicator to skip
|
||||
self.stderr.write("%s %s" % ("We don't do transactions", identifier))
|
||||
return False
|
||||
|
||||
if data.get('profile'):
|
||||
# yet another case of undocumented behavior that I don't care about
|
||||
# ../sentry-current/static/app/utils/profiling/profile/formats/node/trace.json
|
||||
self.stderr.write("%s %s" % ("124", identifier))
|
||||
return False
|
||||
|
||||
if data.get('message'):
|
||||
# yet another case of undocumented behavior that I don't care about (top-level "message")
|
||||
# ../glitchtip/events/test_data/py_hi_event.json
|
||||
self.stderr.write("%s %s" % ("asdf", identifier))
|
||||
return False
|
||||
# In our (private) samples we often have this "_meta" field. I can't (quickly) find any documentation for it,
|
||||
# nor do I have any use for it myself (i.e. I don't display this info in templates). The quickest way to get
|
||||
# something to work is to just remove the info from the json. This comes with the drawback of changing data
|
||||
# on-validation, but for now that's an OK trade=off.
|
||||
if "_meta" in data:
|
||||
del data["_meta"]
|
||||
|
||||
try:
|
||||
schema_filename = settings.BASE_DIR / 'api/event.schema.json'
|
||||
if not schema_filename.exists():
|
||||
# see api/README.md for more info
|
||||
self.stderr.write("%s %s" % ("No schema file, exiting", identifier))
|
||||
exit()
|
||||
|
||||
with open(schema_filename, 'r') as f:
|
||||
schema = json.loads(f.read())
|
||||
|
||||
jsonschema.validate(data, schema)
|
||||
except jsonschema.ValidationError as e:
|
||||
self.stderr.write("%s %s %s" % ("still not ok at", repr(e), identifier))
|
||||
self.stderr.write("%s %s" % (repr(e), identifier))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import requests
|
||||
import os
|
||||
import inspect
|
||||
import uuid
|
||||
@@ -12,6 +13,7 @@ from datetime import datetime, timezone
|
||||
from django.test import TestCase as DjangoTestCase, TransactionTestCase
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import tag
|
||||
from django.conf import settings
|
||||
|
||||
from projects.models import Project, ProjectMembership
|
||||
from releases.models import create_release_if_needed
|
||||
@@ -457,6 +459,20 @@ class IntegrationTest(TransactionTestCase):
|
||||
if self.verbosity > 1:
|
||||
print(f"Found {len(event_samples)} event samples and {len(event_samples_private)} private event samples")
|
||||
|
||||
try:
|
||||
github_result = requests.get(
|
||||
"https://raw.githubusercontent.com/getsentry/sentry-data-schemas/main/relay/event.schema.json")
|
||||
github_result.raise_for_status()
|
||||
|
||||
with open(settings.BASE_DIR / "api/event.schema.json", "r") as f:
|
||||
my_contents = f.read()
|
||||
|
||||
self.assertEqual(my_contents, github_result.content.decode("utf-8"), "event.schema.json is not up-to-date")
|
||||
except requests.RequestException:
|
||||
# getting the latest schema "once in a while" is nice so that we can be sure we're not falling behind;
|
||||
# but we don't want that to introduce a point-of-failure in our tests. So print-and-continue.
|
||||
print("Could not fetch the latest event schema from GitHub; I will not fail the tests for this")
|
||||
|
||||
for filename in event_samples + event_samples_private:
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
@@ -468,7 +484,7 @@ class IntegrationTest(TransactionTestCase):
|
||||
data["timestamp"] = time.time()
|
||||
|
||||
if not command.is_valid(data, filename):
|
||||
continue
|
||||
raise Exception("validatity check in %s: %s" % filename, command.stderr.getvalue())
|
||||
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/store/",
|
||||
@@ -480,7 +496,8 @@ class IntegrationTest(TransactionTestCase):
|
||||
},
|
||||
)
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
200, response.status_code, "Error in %s: %s" % (
|
||||
filename, response.content if response.status_code != 302 else response.url))
|
||||
|
||||
for event in Event.objects.all():
|
||||
urls = [
|
||||
|
||||
Reference in New Issue
Block a user