forked from GithubBackups/healthchecks
Move notification logic to hc.api.transports. Don't use "paused" state for checks.
This commit is contained in:
parent
7b475118f9
commit
21a042aa16
@ -182,7 +182,7 @@ class NotificationsAdmin(admin.ModelAdmin):
|
|||||||
search_fields = ["owner__name", "owner__code", "channel__value"]
|
search_fields = ["owner__name", "owner__code", "channel__value"]
|
||||||
list_select_related = ("owner", "channel")
|
list_select_related = ("owner", "channel")
|
||||||
list_display = ("id", "created", "check_status", "check_name",
|
list_display = ("id", "created", "check_status", "check_name",
|
||||||
"channel_kind", "channel_value", "status")
|
"channel_kind", "channel_value")
|
||||||
list_filter = ("created", "check_status", "channel__kind")
|
list_filter = ("created", "check_status", "channel__kind")
|
||||||
|
|
||||||
def check_name(self, obj):
|
def check_name(self, obj):
|
||||||
|
@ -39,27 +39,17 @@ class Command(BaseCommand):
|
|||||||
Return False if no checks need to be processed.
|
Return False if no checks need to be processed.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Save the new status. If sendalerts crashes,
|
||||||
|
# it won't process this check again.
|
||||||
check.status = check.get_status()
|
check.status = check.get_status()
|
||||||
|
check.save()
|
||||||
|
|
||||||
tmpl = "\nSending alert, status=%s, code=%s\n"
|
tmpl = "\nSending alert, status=%s, code=%s\n"
|
||||||
self.stdout.write(tmpl % (check.status, check.code))
|
self.stdout.write(tmpl % (check.status, check.code))
|
||||||
|
check.send_alert()
|
||||||
|
|
||||||
try:
|
connection.close()
|
||||||
check.send_alert()
|
|
||||||
except:
|
|
||||||
# Catch EVERYTHING. If we crash here, what can happen is:
|
|
||||||
# - the sendalerts command will crash
|
|
||||||
# - supervisor will respawn sendalerts command
|
|
||||||
# - sendalerts will try same thing again, resulting in
|
|
||||||
# infinite loop
|
|
||||||
# So instead we catch and log all exceptions, and mark
|
|
||||||
# the checks as paused so they are not retried.
|
|
||||||
logger.error("Could not alert %s" % check.code, exc_info=True)
|
|
||||||
check.status = "paused"
|
|
||||||
finally:
|
|
||||||
check.save()
|
|
||||||
connection.close()
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
24
hc/api/migrations/0022_auto_20160130_2042.py
Normal file
24
hc/api/migrations/0022_auto_20160130_2042.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.9 on 2016-01-30 20:42
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('api', '0021_ping_n'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='notification',
|
||||||
|
name='status',
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='notification',
|
||||||
|
name='error',
|
||||||
|
field=models.CharField(blank=True, max_length=200),
|
||||||
|
),
|
||||||
|
]
|
126
hc/api/models.py
126
hc/api/models.py
@ -1,17 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import timedelta as td
|
from datetime import timedelta as td
|
||||||
|
|
||||||
import requests
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.core.urlresolvers import reverse
|
from django.core.urlresolvers import reverse
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.template.loader import render_to_string
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from hc.api import transports
|
||||||
from hc.lib import emails
|
from hc.lib import emails
|
||||||
|
|
||||||
STATUSES = (
|
STATUSES = (
|
||||||
@ -125,103 +123,37 @@ class Channel(models.Model):
|
|||||||
verify_link = settings.SITE_ROOT + verify_link
|
verify_link = settings.SITE_ROOT + verify_link
|
||||||
emails.verify_email(self.value, {"verify_link": verify_link})
|
emails.verify_email(self.value, {"verify_link": verify_link})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def transport(self):
|
||||||
|
if self.kind == "email":
|
||||||
|
return transports.Email(self)
|
||||||
|
elif self.kind == "webhook":
|
||||||
|
return transports.Webhook(self)
|
||||||
|
elif self.kind == "slack":
|
||||||
|
return transports.Slack(self)
|
||||||
|
elif self.kind == "hipchat":
|
||||||
|
return transports.HipChat(self)
|
||||||
|
elif self.kind == "pd":
|
||||||
|
return transports.PagerDuty(self)
|
||||||
|
elif self.kind == "po":
|
||||||
|
return transports.Pushover()
|
||||||
|
else:
|
||||||
|
raise NotImplemented("Unknown channel kind: %s" % self.kind)
|
||||||
|
|
||||||
def notify(self, check):
|
def notify(self, check):
|
||||||
|
# Make 3 attempts--
|
||||||
|
for x in range(0, 3):
|
||||||
|
error = self.transport.notify(check) or ""
|
||||||
|
if error == "":
|
||||||
|
break # Success!
|
||||||
|
|
||||||
n = Notification(owner=check, channel=self)
|
n = Notification(owner=check, channel=self)
|
||||||
n.check_status = check.status
|
n.check_status = check.status
|
||||||
|
n.error = error
|
||||||
|
n.save()
|
||||||
|
|
||||||
if self.kind == "email" and self.email_verified:
|
def test(self):
|
||||||
ctx = {
|
return self.transport().test()
|
||||||
"check": check,
|
|
||||||
"checks": self.user.check_set.order_by("created"),
|
|
||||||
"now": timezone.now()
|
|
||||||
}
|
|
||||||
emails.alert(self.value, ctx)
|
|
||||||
n.save()
|
|
||||||
elif self.kind == "webhook" and check.status == "down":
|
|
||||||
try:
|
|
||||||
headers = {"User-Agent": "healthchecks.io"}
|
|
||||||
r = requests.get(self.value, timeout=5, headers=headers)
|
|
||||||
n.status = r.status_code
|
|
||||||
except requests.exceptions.Timeout:
|
|
||||||
# Well, we tried
|
|
||||||
pass
|
|
||||||
|
|
||||||
n.save()
|
|
||||||
elif self.kind == "slack":
|
|
||||||
tmpl = "integrations/slack_message.json"
|
|
||||||
text = render_to_string(tmpl, {"check": check})
|
|
||||||
payload = json.loads(text)
|
|
||||||
r = requests.post(self.value, json=payload, timeout=5)
|
|
||||||
|
|
||||||
n.status = r.status_code
|
|
||||||
n.save()
|
|
||||||
elif self.kind == "hipchat":
|
|
||||||
tmpl = "integrations/hipchat_message.html"
|
|
||||||
text = render_to_string(tmpl, {"check": check})
|
|
||||||
payload = {
|
|
||||||
"message": text,
|
|
||||||
"color": "green" if check.status == "up" else "red",
|
|
||||||
}
|
|
||||||
|
|
||||||
r = requests.post(self.value, json=payload, timeout=5)
|
|
||||||
|
|
||||||
n.status = r.status_code
|
|
||||||
n.save()
|
|
||||||
|
|
||||||
elif self.kind == "pd":
|
|
||||||
if check.status == "down":
|
|
||||||
event_type = "trigger"
|
|
||||||
description = "%s is DOWN" % check.name_then_code()
|
|
||||||
else:
|
|
||||||
event_type = "resolve"
|
|
||||||
description = "%s received a ping and is now UP" % \
|
|
||||||
check.name_then_code()
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"service_key": self.value,
|
|
||||||
"incident_key": str(check.code),
|
|
||||||
"event_type": event_type,
|
|
||||||
"description": description,
|
|
||||||
"client": "healthchecks.io",
|
|
||||||
"client_url": settings.SITE_ROOT
|
|
||||||
}
|
|
||||||
|
|
||||||
url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
|
|
||||||
r = requests.post(url, data=json.dumps(payload), timeout=5)
|
|
||||||
|
|
||||||
n.status = r.status_code
|
|
||||||
n.save()
|
|
||||||
|
|
||||||
elif self.kind == "po":
|
|
||||||
tmpl = "integrations/pushover_message.html"
|
|
||||||
ctx = {
|
|
||||||
"check": check,
|
|
||||||
"down_checks": self.user.check_set.filter(status="down").exclude(code=check.code).order_by("created"),
|
|
||||||
}
|
|
||||||
text = render_to_string(tmpl, ctx).strip()
|
|
||||||
if check.status == "down":
|
|
||||||
title = "%s is DOWN" % check.name_then_code()
|
|
||||||
else:
|
|
||||||
title = "%s is now UP" % check.name_then_code()
|
|
||||||
|
|
||||||
user_key, priority, _ = self.po_value
|
|
||||||
payload = {
|
|
||||||
"token": settings.PUSHOVER_API_TOKEN,
|
|
||||||
"user": user_key,
|
|
||||||
"message": text,
|
|
||||||
"title": title,
|
|
||||||
"html": 1,
|
|
||||||
"priority": priority,
|
|
||||||
}
|
|
||||||
if priority == 2: # Emergency notification
|
|
||||||
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
|
|
||||||
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
|
|
||||||
|
|
||||||
url = "https://api.pushover.net/1/messages.json"
|
|
||||||
r = requests.post(url, data=payload, timeout=5)
|
|
||||||
|
|
||||||
n.status = r.status_code
|
|
||||||
n.save()
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def po_value(self):
|
def po_value(self):
|
||||||
@ -236,4 +168,4 @@ class Notification(models.Model):
|
|||||||
check_status = models.CharField(max_length=6)
|
check_status = models.CharField(max_length=6)
|
||||||
channel = models.ForeignKey(Channel)
|
channel = models.ForeignKey(Channel)
|
||||||
created = models.DateTimeField(auto_now_add=True)
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
status = models.IntegerField(default=0)
|
error = models.CharField(max_length=200, blank=True)
|
||||||
|
@ -20,7 +20,7 @@ class NotifyTestCase(BaseTestCase):
|
|||||||
self.channel.save()
|
self.channel.save()
|
||||||
self.channel.checks.add(self.check)
|
self.channel.checks.add(self.check)
|
||||||
|
|
||||||
@patch("hc.api.models.requests.get")
|
@patch("hc.api.transports.requests.get")
|
||||||
def test_webhook(self, mock_get):
|
def test_webhook(self, mock_get):
|
||||||
self._setup_data("webhook", "http://example")
|
self._setup_data("webhook", "http://example")
|
||||||
mock_get.return_value.status_code = 200
|
mock_get.return_value.status_code = 200
|
||||||
@ -30,16 +30,20 @@ class NotifyTestCase(BaseTestCase):
|
|||||||
u"http://example", headers={"User-Agent": "healthchecks.io"},
|
u"http://example", headers={"User-Agent": "healthchecks.io"},
|
||||||
timeout=5)
|
timeout=5)
|
||||||
|
|
||||||
@patch("hc.api.models.requests.get", side_effect=ReadTimeout)
|
@patch("hc.api.transports.requests.get", side_effect=ReadTimeout)
|
||||||
def test_webhooks_handle_timeouts(self, mock_get):
|
def test_webhooks_handle_timeouts(self, mock_get):
|
||||||
self._setup_data("webhook", "http://example")
|
self._setup_data("webhook", "http://example")
|
||||||
self.channel.notify(self.check)
|
self.channel.notify(self.check)
|
||||||
assert Notification.objects.count() == 1
|
|
||||||
|
n = Notification.objects.get()
|
||||||
|
self.assertEqual(n.error, "Connection timed out")
|
||||||
|
|
||||||
def test_email(self):
|
def test_email(self):
|
||||||
self._setup_data("email", "alice@example.org")
|
self._setup_data("email", "alice@example.org")
|
||||||
self.channel.notify(self.check)
|
self.channel.notify(self.check)
|
||||||
assert Notification.objects.count() == 1
|
|
||||||
|
n = Notification.objects.get()
|
||||||
|
self.assertEqual(n.error, "")
|
||||||
|
|
||||||
# And email should have been sent
|
# And email should have been sent
|
||||||
self.assertEqual(len(mail.outbox), 1)
|
self.assertEqual(len(mail.outbox), 1)
|
||||||
@ -48,21 +52,24 @@ class NotifyTestCase(BaseTestCase):
|
|||||||
self._setup_data("email", "alice@example.org", email_verified=False)
|
self._setup_data("email", "alice@example.org", email_verified=False)
|
||||||
self.channel.notify(self.check)
|
self.channel.notify(self.check)
|
||||||
|
|
||||||
assert Notification.objects.count() == 0
|
assert Notification.objects.count() == 1
|
||||||
|
n = Notification.objects.first()
|
||||||
|
self.assertEqual(n.error, "Email not verified")
|
||||||
self.assertEqual(len(mail.outbox), 0)
|
self.assertEqual(len(mail.outbox), 0)
|
||||||
|
|
||||||
@patch("hc.api.models.requests.post")
|
@patch("hc.api.transports.JsonTransport.post")
|
||||||
def test_pd(self, mock_post):
|
def test_pd(self, mock_post):
|
||||||
self._setup_data("pd", "123")
|
self._setup_data("pd", "123")
|
||||||
mock_post.return_value.status_code = 200
|
mock_post.return_value = None
|
||||||
|
|
||||||
self.channel.notify(self.check)
|
self.channel.notify(self.check)
|
||||||
assert Notification.objects.count() == 1
|
assert Notification.objects.count() == 1
|
||||||
|
|
||||||
args, kwargs = mock_post.call_args
|
args, kwargs = mock_post.call_args
|
||||||
assert "trigger" in kwargs["data"]
|
payload = args[1]
|
||||||
|
self.assertEqual(payload["event_type"], "trigger")
|
||||||
|
|
||||||
@patch("hc.api.models.requests.post")
|
@patch("hc.api.transports.requests.post")
|
||||||
def test_slack(self, mock_post):
|
def test_slack(self, mock_post):
|
||||||
self._setup_data("slack", "123")
|
self._setup_data("slack", "123")
|
||||||
mock_post.return_value.status_code = 200
|
mock_post.return_value.status_code = 200
|
||||||
|
145
hc/api/transports.py
Normal file
145
hc/api/transports.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
from django.conf import settings
|
||||||
|
from django.template.loader import render_to_string
|
||||||
|
from django.utils import timezone
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from hc.lib import emails
|
||||||
|
|
||||||
|
|
||||||
|
def tmpl(template_name, **ctx):
|
||||||
|
template_path = "integrations/%s" % template_name
|
||||||
|
return render_to_string(template_path, ctx).strip()
|
||||||
|
|
||||||
|
|
||||||
|
class Transport(object):
|
||||||
|
def __init__(self, channel):
|
||||||
|
self.channel = channel
|
||||||
|
|
||||||
|
def notify(self, check):
|
||||||
|
""" Send notification about current status of the check.
|
||||||
|
|
||||||
|
This method returns None on success, and error message
|
||||||
|
on error.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
""" Send test message.
|
||||||
|
|
||||||
|
This method returns None on success, and error message
|
||||||
|
on error.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def checks(self):
|
||||||
|
return self.channel.user.check_set.order_by("created")
|
||||||
|
|
||||||
|
|
||||||
|
class Email(Transport):
|
||||||
|
def notify(self, check):
|
||||||
|
if not self.channel.email_verified:
|
||||||
|
return "Email not verified"
|
||||||
|
|
||||||
|
ctx = {
|
||||||
|
"check": check,
|
||||||
|
"checks": self.checks(),
|
||||||
|
"now": timezone.now()
|
||||||
|
}
|
||||||
|
emails.alert(self.channel.value, ctx)
|
||||||
|
|
||||||
|
|
||||||
|
class Webhook(Transport):
|
||||||
|
def notify(self, check):
|
||||||
|
# Webhook integration only fires when check goes down.
|
||||||
|
if check.status != "down":
|
||||||
|
return
|
||||||
|
|
||||||
|
# Webhook transport sends no arguments, so the
|
||||||
|
# notify and test actions are the same
|
||||||
|
return self.test()
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
headers = {"User-Agent": "healthchecks.io"}
|
||||||
|
try:
|
||||||
|
r = requests.get(self.channel.value, timeout=5, headers=headers)
|
||||||
|
if r.status_code not in (200, 201):
|
||||||
|
return "Received status code %d" % r.status_code
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
# Well, we tried
|
||||||
|
return "Connection timed out"
|
||||||
|
|
||||||
|
|
||||||
|
class JsonTransport(Transport):
|
||||||
|
def post(self, url, payload):
|
||||||
|
headers = {"User-Agent": "healthchecks.io"}
|
||||||
|
r = requests.post(url, json=payload, timeout=5, headers=headers)
|
||||||
|
if r.status_code not in (200, 201):
|
||||||
|
return "Received status code %d" % r.status_code
|
||||||
|
|
||||||
|
|
||||||
|
class Slack(JsonTransport):
|
||||||
|
def notify(self, check):
|
||||||
|
text = tmpl("slack_message.json", check=check)
|
||||||
|
payload = json.loads(text)
|
||||||
|
return self.post(self.channel.value, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class HipChat(JsonTransport):
|
||||||
|
def notify(self, check):
|
||||||
|
text = tmpl("hipchat_message.html", check=check)
|
||||||
|
payload = {
|
||||||
|
"message": text,
|
||||||
|
"color": "green" if check.status == "up" else "red",
|
||||||
|
}
|
||||||
|
return self.post(self.channel.value, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class PagerDuty(JsonTransport):
|
||||||
|
URL = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
|
||||||
|
|
||||||
|
def notify(self, check):
|
||||||
|
description = tmpl("pd_description.html", check=check)
|
||||||
|
payload = {
|
||||||
|
"service_key": self.channel.value,
|
||||||
|
"incident_key": str(check.code),
|
||||||
|
"event_type": "trigger" if check.status == "down" else "resolve",
|
||||||
|
"description": description,
|
||||||
|
"client": "healthchecks.io",
|
||||||
|
"client_url": settings.SITE_ROOT
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.post(self.URL, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class Pushover(JsonTransport):
|
||||||
|
URL = "https://api.pushover.net/1/messages.json"
|
||||||
|
|
||||||
|
def notify(self, check):
|
||||||
|
others = self.checks().filter(status="down").exclude(code=check.code)
|
||||||
|
ctx = {
|
||||||
|
"check": check,
|
||||||
|
"down_checks": others,
|
||||||
|
}
|
||||||
|
text = tmpl("pushover_message.html", **ctx)
|
||||||
|
title = tmpl("pushover_title.html", **ctx)
|
||||||
|
user_key, prio = self.channel.value.split("|")
|
||||||
|
payload = {
|
||||||
|
"token": settings.PUSHOVER_API_TOKEN,
|
||||||
|
"user": user_key,
|
||||||
|
"message": text,
|
||||||
|
"title": title,
|
||||||
|
"html": 1,
|
||||||
|
"priority": int(prio),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Emergency notification
|
||||||
|
if prio == "2":
|
||||||
|
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
|
||||||
|
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
|
||||||
|
|
||||||
|
return self.post(self.URL, payload)
|
5
templates/integrations/pd_description.html
Normal file
5
templates/integrations/pd_description.html
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{% if check.status == "down" %}
|
||||||
|
{{ check.name_then_code }} is DOWN
|
||||||
|
{% else %}
|
||||||
|
{{ check.name_then_code }} received a ping and is now UP
|
||||||
|
{% endif %}
|
5
templates/integrations/pushover_title.html
Normal file
5
templates/integrations/pushover_title.html
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{% if check.status == "down" %}
|
||||||
|
{{ check.name_then_code }} is DOWN
|
||||||
|
{% else %}
|
||||||
|
{{ check.name_then_code }} is now UP
|
||||||
|
{% endif %}
|
Loading…
x
Reference in New Issue
Block a user