forked from GithubBackups/healthchecks
Move notification logic to hc.api.transports. Don't use "paused" state for checks.
This commit is contained in:
parent
7b475118f9
commit
21a042aa16
@ -182,7 +182,7 @@ class NotificationsAdmin(admin.ModelAdmin):
|
||||
search_fields = ["owner__name", "owner__code", "channel__value"]
|
||||
list_select_related = ("owner", "channel")
|
||||
list_display = ("id", "created", "check_status", "check_name",
|
||||
"channel_kind", "channel_value", "status")
|
||||
"channel_kind", "channel_value")
|
||||
list_filter = ("created", "check_status", "channel__kind")
|
||||
|
||||
def check_name(self, obj):
|
||||
|
@ -39,27 +39,17 @@ class Command(BaseCommand):
|
||||
Return False if no checks need to be processed.
|
||||
|
||||
"""
|
||||
|
||||
# Save the new status. If sendalerts crashes,
|
||||
# it won't process this check again.
|
||||
check.status = check.get_status()
|
||||
check.save()
|
||||
|
||||
tmpl = "\nSending alert, status=%s, code=%s\n"
|
||||
self.stdout.write(tmpl % (check.status, check.code))
|
||||
|
||||
try:
|
||||
check.send_alert()
|
||||
except:
|
||||
# Catch EVERYTHING. If we crash here, what can happen is:
|
||||
# - the sendalerts command will crash
|
||||
# - supervisor will respawn sendalerts command
|
||||
# - sendalerts will try same thing again, resulting in
|
||||
# infinite loop
|
||||
# So instead we catch and log all exceptions, and mark
|
||||
# the checks as paused so they are not retried.
|
||||
logger.error("Could not alert %s" % check.code, exc_info=True)
|
||||
check.status = "paused"
|
||||
finally:
|
||||
check.save()
|
||||
connection.close()
|
||||
|
||||
connection.close()
|
||||
return True
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
24
hc/api/migrations/0022_auto_20160130_2042.py
Normal file
24
hc/api/migrations/0022_auto_20160130_2042.py
Normal file
@ -0,0 +1,24 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.9 on 2016-01-30 20:42
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0021_ping_n'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='notification',
|
||||
name='status',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='notification',
|
||||
name='error',
|
||||
field=models.CharField(blank=True, max_length=200),
|
||||
),
|
||||
]
|
124
hc/api/models.py
124
hc/api/models.py
@ -1,17 +1,15 @@
|
||||
# coding: utf-8
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
from datetime import timedelta as td
|
||||
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.db import models
|
||||
from django.template.loader import render_to_string
|
||||
from django.utils import timezone
|
||||
from hc.api import transports
|
||||
from hc.lib import emails
|
||||
|
||||
STATUSES = (
|
||||
@ -125,103 +123,37 @@ class Channel(models.Model):
|
||||
verify_link = settings.SITE_ROOT + verify_link
|
||||
emails.verify_email(self.value, {"verify_link": verify_link})
|
||||
|
||||
@property
|
||||
def transport(self):
|
||||
if self.kind == "email":
|
||||
return transports.Email(self)
|
||||
elif self.kind == "webhook":
|
||||
return transports.Webhook(self)
|
||||
elif self.kind == "slack":
|
||||
return transports.Slack(self)
|
||||
elif self.kind == "hipchat":
|
||||
return transports.HipChat(self)
|
||||
elif self.kind == "pd":
|
||||
return transports.PagerDuty(self)
|
||||
elif self.kind == "po":
|
||||
return transports.Pushover()
|
||||
else:
|
||||
raise NotImplemented("Unknown channel kind: %s" % self.kind)
|
||||
|
||||
def notify(self, check):
|
||||
# Make 3 attempts--
|
||||
for x in range(0, 3):
|
||||
error = self.transport.notify(check) or ""
|
||||
if error == "":
|
||||
break # Success!
|
||||
|
||||
n = Notification(owner=check, channel=self)
|
||||
n.check_status = check.status
|
||||
|
||||
if self.kind == "email" and self.email_verified:
|
||||
ctx = {
|
||||
"check": check,
|
||||
"checks": self.user.check_set.order_by("created"),
|
||||
"now": timezone.now()
|
||||
}
|
||||
emails.alert(self.value, ctx)
|
||||
n.save()
|
||||
elif self.kind == "webhook" and check.status == "down":
|
||||
try:
|
||||
headers = {"User-Agent": "healthchecks.io"}
|
||||
r = requests.get(self.value, timeout=5, headers=headers)
|
||||
n.status = r.status_code
|
||||
except requests.exceptions.Timeout:
|
||||
# Well, we tried
|
||||
pass
|
||||
|
||||
n.save()
|
||||
elif self.kind == "slack":
|
||||
tmpl = "integrations/slack_message.json"
|
||||
text = render_to_string(tmpl, {"check": check})
|
||||
payload = json.loads(text)
|
||||
r = requests.post(self.value, json=payload, timeout=5)
|
||||
|
||||
n.status = r.status_code
|
||||
n.save()
|
||||
elif self.kind == "hipchat":
|
||||
tmpl = "integrations/hipchat_message.html"
|
||||
text = render_to_string(tmpl, {"check": check})
|
||||
payload = {
|
||||
"message": text,
|
||||
"color": "green" if check.status == "up" else "red",
|
||||
}
|
||||
|
||||
r = requests.post(self.value, json=payload, timeout=5)
|
||||
|
||||
n.status = r.status_code
|
||||
n.error = error
|
||||
n.save()
|
||||
|
||||
elif self.kind == "pd":
|
||||
if check.status == "down":
|
||||
event_type = "trigger"
|
||||
description = "%s is DOWN" % check.name_then_code()
|
||||
else:
|
||||
event_type = "resolve"
|
||||
description = "%s received a ping and is now UP" % \
|
||||
check.name_then_code()
|
||||
|
||||
payload = {
|
||||
"service_key": self.value,
|
||||
"incident_key": str(check.code),
|
||||
"event_type": event_type,
|
||||
"description": description,
|
||||
"client": "healthchecks.io",
|
||||
"client_url": settings.SITE_ROOT
|
||||
}
|
||||
|
||||
url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
|
||||
r = requests.post(url, data=json.dumps(payload), timeout=5)
|
||||
|
||||
n.status = r.status_code
|
||||
n.save()
|
||||
|
||||
elif self.kind == "po":
|
||||
tmpl = "integrations/pushover_message.html"
|
||||
ctx = {
|
||||
"check": check,
|
||||
"down_checks": self.user.check_set.filter(status="down").exclude(code=check.code).order_by("created"),
|
||||
}
|
||||
text = render_to_string(tmpl, ctx).strip()
|
||||
if check.status == "down":
|
||||
title = "%s is DOWN" % check.name_then_code()
|
||||
else:
|
||||
title = "%s is now UP" % check.name_then_code()
|
||||
|
||||
user_key, priority, _ = self.po_value
|
||||
payload = {
|
||||
"token": settings.PUSHOVER_API_TOKEN,
|
||||
"user": user_key,
|
||||
"message": text,
|
||||
"title": title,
|
||||
"html": 1,
|
||||
"priority": priority,
|
||||
}
|
||||
if priority == 2: # Emergency notification
|
||||
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
|
||||
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
|
||||
|
||||
url = "https://api.pushover.net/1/messages.json"
|
||||
r = requests.post(url, data=payload, timeout=5)
|
||||
|
||||
n.status = r.status_code
|
||||
n.save()
|
||||
def test(self):
|
||||
return self.transport().test()
|
||||
|
||||
@property
|
||||
def po_value(self):
|
||||
@ -236,4 +168,4 @@ class Notification(models.Model):
|
||||
check_status = models.CharField(max_length=6)
|
||||
channel = models.ForeignKey(Channel)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
status = models.IntegerField(default=0)
|
||||
error = models.CharField(max_length=200, blank=True)
|
||||
|
@ -20,7 +20,7 @@ class NotifyTestCase(BaseTestCase):
|
||||
self.channel.save()
|
||||
self.channel.checks.add(self.check)
|
||||
|
||||
@patch("hc.api.models.requests.get")
|
||||
@patch("hc.api.transports.requests.get")
|
||||
def test_webhook(self, mock_get):
|
||||
self._setup_data("webhook", "http://example")
|
||||
mock_get.return_value.status_code = 200
|
||||
@ -30,16 +30,20 @@ class NotifyTestCase(BaseTestCase):
|
||||
u"http://example", headers={"User-Agent": "healthchecks.io"},
|
||||
timeout=5)
|
||||
|
||||
@patch("hc.api.models.requests.get", side_effect=ReadTimeout)
|
||||
@patch("hc.api.transports.requests.get", side_effect=ReadTimeout)
|
||||
def test_webhooks_handle_timeouts(self, mock_get):
|
||||
self._setup_data("webhook", "http://example")
|
||||
self.channel.notify(self.check)
|
||||
assert Notification.objects.count() == 1
|
||||
|
||||
n = Notification.objects.get()
|
||||
self.assertEqual(n.error, "Connection timed out")
|
||||
|
||||
def test_email(self):
|
||||
self._setup_data("email", "alice@example.org")
|
||||
self.channel.notify(self.check)
|
||||
assert Notification.objects.count() == 1
|
||||
|
||||
n = Notification.objects.get()
|
||||
self.assertEqual(n.error, "")
|
||||
|
||||
# And email should have been sent
|
||||
self.assertEqual(len(mail.outbox), 1)
|
||||
@ -48,21 +52,24 @@ class NotifyTestCase(BaseTestCase):
|
||||
self._setup_data("email", "alice@example.org", email_verified=False)
|
||||
self.channel.notify(self.check)
|
||||
|
||||
assert Notification.objects.count() == 0
|
||||
assert Notification.objects.count() == 1
|
||||
n = Notification.objects.first()
|
||||
self.assertEqual(n.error, "Email not verified")
|
||||
self.assertEqual(len(mail.outbox), 0)
|
||||
|
||||
@patch("hc.api.models.requests.post")
|
||||
@patch("hc.api.transports.JsonTransport.post")
|
||||
def test_pd(self, mock_post):
|
||||
self._setup_data("pd", "123")
|
||||
mock_post.return_value.status_code = 200
|
||||
mock_post.return_value = None
|
||||
|
||||
self.channel.notify(self.check)
|
||||
assert Notification.objects.count() == 1
|
||||
|
||||
args, kwargs = mock_post.call_args
|
||||
assert "trigger" in kwargs["data"]
|
||||
payload = args[1]
|
||||
self.assertEqual(payload["event_type"], "trigger")
|
||||
|
||||
@patch("hc.api.models.requests.post")
|
||||
@patch("hc.api.transports.requests.post")
|
||||
def test_slack(self, mock_post):
|
||||
self._setup_data("slack", "123")
|
||||
mock_post.return_value.status_code = 200
|
||||
|
145
hc/api/transports.py
Normal file
145
hc/api/transports.py
Normal file
@ -0,0 +1,145 @@
|
||||
from django.conf import settings
|
||||
from django.template.loader import render_to_string
|
||||
from django.utils import timezone
|
||||
import json
|
||||
import requests
|
||||
|
||||
from hc.lib import emails
|
||||
|
||||
|
||||
def tmpl(template_name, **ctx):
|
||||
template_path = "integrations/%s" % template_name
|
||||
return render_to_string(template_path, ctx).strip()
|
||||
|
||||
|
||||
class Transport(object):
|
||||
def __init__(self, channel):
|
||||
self.channel = channel
|
||||
|
||||
def notify(self, check):
|
||||
""" Send notification about current status of the check.
|
||||
|
||||
This method returns None on success, and error message
|
||||
on error.
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplemented()
|
||||
|
||||
def test(self):
|
||||
""" Send test message.
|
||||
|
||||
This method returns None on success, and error message
|
||||
on error.
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplemented()
|
||||
|
||||
def checks(self):
|
||||
return self.channel.user.check_set.order_by("created")
|
||||
|
||||
|
||||
class Email(Transport):
|
||||
def notify(self, check):
|
||||
if not self.channel.email_verified:
|
||||
return "Email not verified"
|
||||
|
||||
ctx = {
|
||||
"check": check,
|
||||
"checks": self.checks(),
|
||||
"now": timezone.now()
|
||||
}
|
||||
emails.alert(self.channel.value, ctx)
|
||||
|
||||
|
||||
class Webhook(Transport):
|
||||
def notify(self, check):
|
||||
# Webhook integration only fires when check goes down.
|
||||
if check.status != "down":
|
||||
return
|
||||
|
||||
# Webhook transport sends no arguments, so the
|
||||
# notify and test actions are the same
|
||||
return self.test()
|
||||
|
||||
def test(self):
|
||||
headers = {"User-Agent": "healthchecks.io"}
|
||||
try:
|
||||
r = requests.get(self.channel.value, timeout=5, headers=headers)
|
||||
if r.status_code not in (200, 201):
|
||||
return "Received status code %d" % r.status_code
|
||||
except requests.exceptions.Timeout:
|
||||
# Well, we tried
|
||||
return "Connection timed out"
|
||||
|
||||
|
||||
class JsonTransport(Transport):
|
||||
def post(self, url, payload):
|
||||
headers = {"User-Agent": "healthchecks.io"}
|
||||
r = requests.post(url, json=payload, timeout=5, headers=headers)
|
||||
if r.status_code not in (200, 201):
|
||||
return "Received status code %d" % r.status_code
|
||||
|
||||
|
||||
class Slack(JsonTransport):
|
||||
def notify(self, check):
|
||||
text = tmpl("slack_message.json", check=check)
|
||||
payload = json.loads(text)
|
||||
return self.post(self.channel.value, payload)
|
||||
|
||||
|
||||
class HipChat(JsonTransport):
|
||||
def notify(self, check):
|
||||
text = tmpl("hipchat_message.html", check=check)
|
||||
payload = {
|
||||
"message": text,
|
||||
"color": "green" if check.status == "up" else "red",
|
||||
}
|
||||
return self.post(self.channel.value, payload)
|
||||
|
||||
|
||||
class PagerDuty(JsonTransport):
|
||||
URL = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
|
||||
|
||||
def notify(self, check):
|
||||
description = tmpl("pd_description.html", check=check)
|
||||
payload = {
|
||||
"service_key": self.channel.value,
|
||||
"incident_key": str(check.code),
|
||||
"event_type": "trigger" if check.status == "down" else "resolve",
|
||||
"description": description,
|
||||
"client": "healthchecks.io",
|
||||
"client_url": settings.SITE_ROOT
|
||||
}
|
||||
|
||||
return self.post(self.URL, payload)
|
||||
|
||||
|
||||
class Pushover(JsonTransport):
|
||||
URL = "https://api.pushover.net/1/messages.json"
|
||||
|
||||
def notify(self, check):
|
||||
others = self.checks().filter(status="down").exclude(code=check.code)
|
||||
ctx = {
|
||||
"check": check,
|
||||
"down_checks": others,
|
||||
}
|
||||
text = tmpl("pushover_message.html", **ctx)
|
||||
title = tmpl("pushover_title.html", **ctx)
|
||||
user_key, prio = self.channel.value.split("|")
|
||||
payload = {
|
||||
"token": settings.PUSHOVER_API_TOKEN,
|
||||
"user": user_key,
|
||||
"message": text,
|
||||
"title": title,
|
||||
"html": 1,
|
||||
"priority": int(prio),
|
||||
}
|
||||
|
||||
# Emergency notification
|
||||
if prio == "2":
|
||||
payload["retry"] = settings.PUSHOVER_EMERGENCY_RETRY_DELAY
|
||||
payload["expire"] = settings.PUSHOVER_EMERGENCY_EXPIRATION
|
||||
|
||||
return self.post(self.URL, payload)
|
5
templates/integrations/pd_description.html
Normal file
5
templates/integrations/pd_description.html
Normal file
@ -0,0 +1,5 @@
|
||||
{% if check.status == "down" %}
|
||||
{{ check.name_then_code }} is DOWN
|
||||
{% else %}
|
||||
{{ check.name_then_code }} received a ping and is now UP
|
||||
{% endif %}
|
5
templates/integrations/pushover_title.html
Normal file
5
templates/integrations/pushover_title.html
Normal file
@ -0,0 +1,5 @@
|
||||
{% if check.status == "down" %}
|
||||
{{ check.name_then_code }} is DOWN
|
||||
{% else %}
|
||||
{{ check.name_then_code }} is now UP
|
||||
{% endif %}
|
Loading…
x
Reference in New Issue
Block a user