From d4bbd9d03e939b69d0c087523ef8625f56350785 Mon Sep 17 00:00:00 2001 From: Robert Raposa <rraposa@edx.org> Date: Thu, 18 Mar 2021 10:37:47 -0400 Subject: [PATCH] fix: move ignored error message custom attribute (#27047) The `error_expected` custom attribute used to contain both the class name and the error message. This had the following issues: * Combining data in the same custom attribute limits the ability to query. * The additional error class and message data is only needed for ignored errors, since this data isn't available elsewhere. The following changes were made: * `error_expected` will always have the value True if present. * `error_ignored` no longer exists. * `error_ignored_class` will contain the error module and class for ignored errors. * `error_ignored_message` will contain the error message for ignored errors. ARCHBOM-1708 --- ...logging-and-monitoring-expected-errors.rst | 11 +++-- openedx/core/lib/request_utils.py | 44 ++++++++++++------- openedx/core/lib/tests/test_request_utils.py | 39 ++++++++++------ 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst b/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst index e833f9093f6..0def7a08e6c 100644 --- a/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst +++ b/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst @@ -18,12 +18,15 @@ Monitoring expected errors At a minimum, it is recommended that you add monitoring for any expected errors, including ignored errors. You do this by using the ``EXPECTED_ERRORS`` setting. For details on configuring, see the documentation for `EXPECTED_ERRORS settings and toggles on Readthedocs`_. -By default, this will provide an ``error_expected`` custom attribute for every expected error. This custom attribute can be used in the following ways: +This will provide an ``error_expected`` custom attribute for every expected error. This custom attribute can be used in the following ways. -* Alert conditions can exclude or include expected errors as necessary. -* The value of the custom attribute includes the error name and message, which may help in diagnosing an unexpected scenario. +* Use ``WHERE error_expected IS NULL`` to filter out expected errors from most alert conditions. +* Set up special alerts for an overabundance of expected errors using ``WHERE error_expected IS NOT NULL``. -Additionally, a subset of these errors will also have an ``error_ignored`` custom attribute if the error is configured as ignored. +Additionally, a subset of expected errors that are configured as ignored will also get ``error_ignored_class`` and ``error_ignored_message`` custom attributes. + +* Using New Relic terminology, this extra error class and message data will live on the Transaction and not the TransactionError, because ignored errors won't have a TransactionError. +* Use these additional custom attributes to help diagnose unexpected issues with ignored errors. .. _EXPECTED_ERRORS settings and toggles on Readthedocs: https://edx.readthedocs.io/projects/edx-platform-technical/en/latest/search.html?q=EXPECTED_ERRORS&check_keywords=yes&area=default diff --git a/openedx/core/lib/request_utils.py b/openedx/core/lib/request_utils.py index b471f903e44..5cbc4305a0a 100644 --- a/openedx/core/lib/request_utils.py +++ b/openedx/core/lib/request_utils.py @@ -273,12 +273,13 @@ class ExpectedErrorMiddleware: # This setting is configured of a list of dicts. See setting and toggle annotations for # ``EXPECTED_ERRORS[N]['XXX']`` for details of each item in the dict. # If this setting is a non-empty list, all uncaught errors processed will get a ``checked_error_expected_from`` -# attribute, whether they are expected or not. This can be used to ensure that all uncaught errors are actually -# processed. Those errors that are processed and match a 'MODULE_AND_CLASS' (documented elsewhere), will get an -# ``error_expected`` custom attribute. The value of the custom attribute will include the error class module, name -# and message. Unexpected errors would be errors with ``error_expected IS NULL``. -# .. setting_warning: We use Django Middleware and a DRF custom error handler to find uncaught errors. It is still -# possible that some errors could slip by these mechanisms. +# attribute, whether they are expected or not. Those errors that are processed and match a 'MODULE_AND_CLASS' +# (documented elsewhere), will get an ``error_expected`` custom attribute. Unexpected errors would be errors with +# ``error_expected IS NULL``. For additional diagnostic information for ignored errors, see the +# EXPECTED_ERRORS[N]['IS_IGNORED'] annotation. +# .. setting_warning: We use Django Middleware and a DRF custom error handler to find uncaught errors. Some errors may +# slip through the cracks, like ValidationError. Any error where ``checked_error_expected_from IS NULL`` is +# an error that was not processed. # .. setting_name: EXPECTED_ERRORS[N]['MODULE_AND_CLASS'] # .. setting_default: None @@ -288,16 +289,19 @@ class ExpectedErrorMiddleware: # .. toggle_name: EXPECTED_ERRORS[N]['IS_IGNORED'] # .. toggle_implementation: DjangoSetting # .. toggle_default: True -# .. toggle_description: If True, adds a custom attribute ``error_ignored`` if the error is configured to be ignored -# from monitoring. For example, for ignoring errors in New Relic, see: +# .. toggle_description: Set this to False if the errors are not ignored by monitoring, but only expected, like +# for temporary problems that may take some time to fix. If True, adds the custom attributes +# ``error_ignored_class`` and ``error_ignored_message`` to help diagnose issues with ignored errors, since +# this data is not otherwise available. For example of ignoring errors in New Relic, see: # https://docs.newrelic.com/docs/agents/manage-apm-agents/agent-data/manage-errors-apm-collect-ignore-or-mark-expected/#ignore pylint: disable=line-too-long,useless-suppression -# Ignored errors would be errors where ``error_ignored IS NOT NULL``. +# To query for ignored errors, you would use ``error_ignored_class IS NOT NULL``. # Note: This is defaulted to True because it will be easier for us to detect if True is not the correct value, by # seeing that these errors aren't actually ignored. # .. toggle_warning: At this time, this toggle does not actually configure the error to be ignored. It is meant to match # the ignored error configuration found elsewhere. When monitoring, no errors should ever have the attribute -# ``error_ignored``. If it is found, it means we are stating an error should be ignored when it is not actually -# configured as such, or the configuration is not working. +# ``error_ignored_class``. Only Transactions should have this custom attribute. If found for an error, it means we +# are stating an error should be ignored when it is not actually configured as such, or the configuration is not +# working. # .. toggle_use_cases: opt_out # .. toggle_creation_date: 2021-03-11 @@ -468,15 +472,23 @@ def _log_and_monitor_expected_errors(request, exception, caller): if module_and_class not in expected_error_settings_dict: return - exception_message = repr(str(exception)) - module_and_class_with_message = f'{module_and_class}({exception_message})' - set_custom_attribute('error_expected', module_and_class_with_message) + exception_message = str(exception) + set_custom_attribute('error_expected', True) expected_error_settings = expected_error_settings_dict[module_and_class] if expected_error_settings['is_ignored']: - set_custom_attribute('error_ignored', True) + # Additional error details are needed for ignored errors, because they are otherwise + # not available by our monitoring system, because they have been ignored. + set_custom_attribute('error_ignored_class', module_and_class) + set_custom_attribute('error_ignored_message', exception_message) if expected_error_settings['log_error']: exc_info = exception if expected_error_settings['log_stack_trace'] else None request_path = getattr(request, 'path', 'request-path-unknown') - log.info('Expected error %s seen for path %s', module_and_class_with_message, request_path, exc_info=exc_info) + log.info( + 'Expected error %s: %s: seen for path %s', + module_and_class, + exception_message, + request_path, + exc_info=exc_info, + ) diff --git a/openedx/core/lib/tests/test_request_utils.py b/openedx/core/lib/tests/test_request_utils.py index d6b53f058cd..9e947641053 100644 --- a/openedx/core/lib/tests/test_request_utils.py +++ b/openedx/core/lib/tests/test_request_utils.py @@ -399,8 +399,9 @@ class TestExpectedErrorMiddleware(unittest.TestCase): mock_set_custom_attribute.assert_has_calls( [ call('checked_error_expected_from', 'middleware'), - call('error_expected', "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"), - call('error_ignored', True) + call('error_expected', True), + call('error_ignored_class', 'openedx.core.lib.tests.test_request_utils.CustomError1'), + call('error_ignored_message', 'Test failure'), ], any_order=True ) @@ -426,8 +427,9 @@ class TestExpectedErrorMiddleware(unittest.TestCase): expected_calls = [ call('checked_error_expected_from', 'middleware'), - call('error_expected', "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"), - call('error_ignored', True), + call('error_expected', True), + call('error_ignored_class', 'openedx.core.lib.tests.test_request_utils.CustomError1'), + call('error_ignored_message', 'Test failure'), call('checked_error_expected_from', 'middleware'), ] if use_same_exception: @@ -448,7 +450,11 @@ class TestExpectedErrorMiddleware(unittest.TestCase): mock_exception = Exception("Oops") ExpectedErrorMiddleware('mock-response').process_exception(self.mock_request, mock_exception) - mock_set_custom_attribute.assert_has_calls([call('error_expected', "Exception('Oops')")]) + mock_set_custom_attribute.assert_has_calls([ + call('error_expected', True), + call('error_ignored_class', 'Exception'), + call('error_ignored_message', 'Oops'), + ]) @patch('openedx.core.lib.request_utils.set_custom_attribute') @patch('openedx.core.lib.request_utils.log') @@ -461,8 +467,11 @@ class TestExpectedErrorMiddleware(unittest.TestCase): def test_process_exception_expected_error_with_overrides( self, log_error, log_stack_trace, mock_logger, mock_set_custom_attribute, ): + expected_class = 'openedx.core.lib.tests.test_request_utils.CustomError1' + expected_message = 'Test failure' + with override_settings(EXPECTED_ERRORS=[{ - 'MODULE_AND_CLASS': 'openedx.core.lib.tests.test_request_utils.CustomError1', + 'MODULE_AND_CLASS': expected_class, 'IS_IGNORED': False, 'LOG_ERROR': log_error, 'LOG_STACK_TRACE': log_stack_trace, @@ -470,18 +479,17 @@ class TestExpectedErrorMiddleware(unittest.TestCase): }]): ExpectedErrorMiddleware('mock-response').process_exception(self.mock_request, self.mock_exception) - expected_class_with_message = "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')" if log_error: exc_info = self.mock_exception if log_stack_trace else None mock_logger.info.assert_called_once_with( - 'Expected error %s seen for path %s', expected_class_with_message, '/test', exc_info=exc_info + 'Expected error %s: %s: seen for path %s', expected_class, expected_message, '/test', exc_info=exc_info ) else: mock_logger.info.assert_not_called() mock_set_custom_attribute.assert_has_calls( [ call('checked_error_expected_from', 'middleware'), - call('error_expected', expected_class_with_message), + call('error_expected', True), ], any_order=True ) @@ -520,18 +528,21 @@ class TestExpectedErrorExceptionHandler(unittest.TestCase): expected_request_path = 'request-path-unknown' expected_error_exception_handler(self.mock_exception, mock_context) - expected_class_with_message = "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')" + expected_class = 'openedx.core.lib.tests.test_request_utils.CustomError1' + expected_message = 'Test failure' mock_logger.info.assert_called_once_with( - 'Expected error %s seen for path %s', - expected_class_with_message, + 'Expected error %s: %s: seen for path %s', + expected_class, + expected_message, expected_request_path, exc_info=None, ) mock_set_custom_attribute.assert_has_calls( [ call('checked_error_expected_from', 'drf'), - call('error_expected', expected_class_with_message), - call('error_ignored', True) + call('error_expected', True), + call('error_ignored_class', expected_class), + call('error_ignored_message', expected_message), ], any_order=True ) -- GitLab