From d4bbd9d03e939b69d0c087523ef8625f56350785 Mon Sep 17 00:00:00 2001
From: Robert Raposa <rraposa@edx.org>
Date: Thu, 18 Mar 2021 10:37:47 -0400
Subject: [PATCH] fix: move ignored error message custom attribute (#27047)

The `error_expected` custom attribute used to contain
both the class name and the error message. This had
the following issues:

* Combining data in the same custom attribute limits
the ability to query.
* The additional error class and message data is only
needed for ignored errors, since this data isn't
available elsewhere.

The following changes were made:
* `error_expected` will always have the value True
if present.
* `error_ignored` no longer exists.
* `error_ignored_class` will contain the error module
and class for ignored errors.
* `error_ignored_message` will contain the error message
for ignored errors.

ARCHBOM-1708
---
 ...logging-and-monitoring-expected-errors.rst | 11 +++--
 openedx/core/lib/request_utils.py             | 44 ++++++++++++-------
 openedx/core/lib/tests/test_request_utils.py  | 39 ++++++++++------
 3 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst b/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst
index e833f9093f6..0def7a08e6c 100644
--- a/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst
+++ b/openedx/core/lib/docs/how_tos/logging-and-monitoring-expected-errors.rst
@@ -18,12 +18,15 @@ Monitoring expected errors
 
 At a minimum, it is recommended that you add monitoring for any expected errors, including ignored errors. You do this by using the ``EXPECTED_ERRORS`` setting. For details on configuring, see the documentation for `EXPECTED_ERRORS settings and toggles on Readthedocs`_.
 
-By default, this will provide an ``error_expected`` custom attribute for every expected error. This custom attribute can be used in the following ways:
+This will provide an ``error_expected`` custom attribute for every expected error. This custom attribute can be used in the following ways.
 
-* Alert conditions can exclude or include expected errors as necessary.
-* The value of the custom attribute includes the error name and message, which may help in diagnosing an unexpected scenario.
+* Use ``WHERE error_expected IS NULL`` to filter out expected errors from most alert conditions.
+* Set up special alerts for an overabundance of expected errors using ``WHERE error_expected IS NOT NULL``.
 
-Additionally, a subset of these errors will also have an ``error_ignored`` custom attribute if the error is configured as ignored.
+Additionally, a subset of expected errors that are configured as ignored will also get ``error_ignored_class`` and ``error_ignored_message`` custom attributes.
+
+* Using New Relic terminology, this extra error class and message data will live on the Transaction and not the TransactionError, because ignored errors won't have a TransactionError.
+* Use these additional custom attributes to help diagnose unexpected issues with ignored errors.
 
 .. _EXPECTED_ERRORS settings and toggles on Readthedocs: https://edx.readthedocs.io/projects/edx-platform-technical/en/latest/search.html?q=EXPECTED_ERRORS&check_keywords=yes&area=default
 
diff --git a/openedx/core/lib/request_utils.py b/openedx/core/lib/request_utils.py
index b471f903e44..5cbc4305a0a 100644
--- a/openedx/core/lib/request_utils.py
+++ b/openedx/core/lib/request_utils.py
@@ -273,12 +273,13 @@ class ExpectedErrorMiddleware:
 #     This setting is configured of a list of dicts. See setting and toggle annotations for
 #     ``EXPECTED_ERRORS[N]['XXX']`` for details of each item in the dict.
 #     If this setting is a non-empty list, all uncaught errors processed will get a ``checked_error_expected_from``
-#     attribute, whether they are expected or not. This can be used to ensure that all uncaught errors are actually
-#     processed. Those errors that are processed and match a 'MODULE_AND_CLASS' (documented elsewhere), will get an
-#     ``error_expected`` custom attribute. The value of the custom attribute will include the error class module, name
-#     and message. Unexpected errors would be errors with ``error_expected IS NULL``.
-# .. setting_warning: We use Django Middleware and a DRF custom error handler to find uncaught errors. It is still
-#     possible that some errors could slip by these mechanisms.
+#     attribute, whether they are expected or not. Those errors that are processed and match a 'MODULE_AND_CLASS'
+#     (documented elsewhere), will get an ``error_expected`` custom attribute. Unexpected errors would be errors with
+#     ``error_expected IS NULL``. For additional diagnostic information for ignored errors, see the
+#     EXPECTED_ERRORS[N]['IS_IGNORED'] annotation.
+# .. setting_warning: We use Django Middleware and a DRF custom error handler to find uncaught errors. Some errors may
+#     slip through the cracks, like ValidationError. Any error where ``checked_error_expected_from IS NULL`` is
+#     an error that was not processed.
 
 # .. setting_name: EXPECTED_ERRORS[N]['MODULE_AND_CLASS']
 # .. setting_default: None
@@ -288,16 +289,19 @@ class ExpectedErrorMiddleware:
 # .. toggle_name: EXPECTED_ERRORS[N]['IS_IGNORED']
 # .. toggle_implementation: DjangoSetting
 # .. toggle_default: True
-# .. toggle_description: If True, adds a custom attribute ``error_ignored`` if the error is configured to be ignored
-#      from monitoring. For example, for ignoring errors in New Relic, see:
+# .. toggle_description: Set this to False if the errors are not ignored by monitoring, but only expected, like
+#      for temporary problems that may take some time to fix. If True, adds the custom attributes
+#      ``error_ignored_class`` and ``error_ignored_message`` to help diagnose issues with ignored errors, since
+#      this data is not otherwise available. For example of ignoring errors in New Relic, see:
 #      https://docs.newrelic.com/docs/agents/manage-apm-agents/agent-data/manage-errors-apm-collect-ignore-or-mark-expected/#ignore  pylint: disable=line-too-long,useless-suppression
-#      Ignored errors would be errors where ``error_ignored IS NOT NULL``.
+#      To query for ignored errors, you would use ``error_ignored_class IS NOT NULL``.
 #      Note: This is defaulted to True because it will be easier for us to detect if True is not the correct value, by
 #      seeing that these errors aren't actually ignored.
 # .. toggle_warning: At this time, this toggle does not actually configure the error to be ignored. It is meant to match
 #     the ignored error configuration found elsewhere. When monitoring, no errors should ever have the attribute
-#     ``error_ignored``. If it is found, it means we are stating an error should be ignored when it is not actually
-#     configured as such, or the configuration is not working.
+#     ``error_ignored_class``. Only Transactions should have this custom attribute. If found for an error, it means we
+#     are stating an error should be ignored when it is not actually configured as such, or the configuration is not
+#     working.
 # .. toggle_use_cases: opt_out
 # .. toggle_creation_date: 2021-03-11
 
@@ -468,15 +472,23 @@ def _log_and_monitor_expected_errors(request, exception, caller):
     if module_and_class not in expected_error_settings_dict:
         return
 
-    exception_message = repr(str(exception))
-    module_and_class_with_message = f'{module_and_class}({exception_message})'
-    set_custom_attribute('error_expected', module_and_class_with_message)
+    exception_message = str(exception)
+    set_custom_attribute('error_expected', True)
 
     expected_error_settings = expected_error_settings_dict[module_and_class]
     if expected_error_settings['is_ignored']:
-        set_custom_attribute('error_ignored', True)
+        # Additional error details are needed for ignored errors, because they are otherwise
+        # not available by our monitoring system, because they have been ignored.
+        set_custom_attribute('error_ignored_class', module_and_class)
+        set_custom_attribute('error_ignored_message', exception_message)
 
     if expected_error_settings['log_error']:
         exc_info = exception if expected_error_settings['log_stack_trace'] else None
         request_path = getattr(request, 'path', 'request-path-unknown')
-        log.info('Expected error %s seen for path %s', module_and_class_with_message, request_path, exc_info=exc_info)
+        log.info(
+            'Expected error %s: %s: seen for path %s',
+            module_and_class,
+            exception_message,
+            request_path,
+            exc_info=exc_info,
+        )
diff --git a/openedx/core/lib/tests/test_request_utils.py b/openedx/core/lib/tests/test_request_utils.py
index d6b53f058cd..9e947641053 100644
--- a/openedx/core/lib/tests/test_request_utils.py
+++ b/openedx/core/lib/tests/test_request_utils.py
@@ -399,8 +399,9 @@ class TestExpectedErrorMiddleware(unittest.TestCase):
         mock_set_custom_attribute.assert_has_calls(
             [
                 call('checked_error_expected_from', 'middleware'),
-                call('error_expected', "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"),
-                call('error_ignored', True)
+                call('error_expected', True),
+                call('error_ignored_class', 'openedx.core.lib.tests.test_request_utils.CustomError1'),
+                call('error_ignored_message', 'Test failure'),
             ],
             any_order=True
         )
@@ -426,8 +427,9 @@ class TestExpectedErrorMiddleware(unittest.TestCase):
 
         expected_calls = [
             call('checked_error_expected_from', 'middleware'),
-            call('error_expected', "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"),
-            call('error_ignored', True),
+            call('error_expected', True),
+            call('error_ignored_class', 'openedx.core.lib.tests.test_request_utils.CustomError1'),
+            call('error_ignored_message', 'Test failure'),
             call('checked_error_expected_from', 'middleware'),
         ]
         if use_same_exception:
@@ -448,7 +450,11 @@ class TestExpectedErrorMiddleware(unittest.TestCase):
         mock_exception = Exception("Oops")
         ExpectedErrorMiddleware('mock-response').process_exception(self.mock_request, mock_exception)
 
-        mock_set_custom_attribute.assert_has_calls([call('error_expected', "Exception('Oops')")])
+        mock_set_custom_attribute.assert_has_calls([
+            call('error_expected', True),
+            call('error_ignored_class', 'Exception'),
+            call('error_ignored_message', 'Oops'),
+        ])
 
     @patch('openedx.core.lib.request_utils.set_custom_attribute')
     @patch('openedx.core.lib.request_utils.log')
@@ -461,8 +467,11 @@ class TestExpectedErrorMiddleware(unittest.TestCase):
     def test_process_exception_expected_error_with_overrides(
         self, log_error, log_stack_trace, mock_logger, mock_set_custom_attribute,
     ):
+        expected_class = 'openedx.core.lib.tests.test_request_utils.CustomError1'
+        expected_message = 'Test failure'
+
         with override_settings(EXPECTED_ERRORS=[{
-            'MODULE_AND_CLASS': 'openedx.core.lib.tests.test_request_utils.CustomError1',
+            'MODULE_AND_CLASS': expected_class,
             'IS_IGNORED': False,
             'LOG_ERROR': log_error,
             'LOG_STACK_TRACE': log_stack_trace,
@@ -470,18 +479,17 @@ class TestExpectedErrorMiddleware(unittest.TestCase):
         }]):
             ExpectedErrorMiddleware('mock-response').process_exception(self.mock_request, self.mock_exception)
 
-        expected_class_with_message = "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"
         if log_error:
             exc_info = self.mock_exception if log_stack_trace else None
             mock_logger.info.assert_called_once_with(
-                'Expected error %s seen for path %s', expected_class_with_message, '/test', exc_info=exc_info
+                'Expected error %s: %s: seen for path %s', expected_class, expected_message, '/test', exc_info=exc_info
             )
         else:
             mock_logger.info.assert_not_called()
         mock_set_custom_attribute.assert_has_calls(
             [
                 call('checked_error_expected_from', 'middleware'),
-                call('error_expected', expected_class_with_message),
+                call('error_expected', True),
             ],
             any_order=True
         )
@@ -520,18 +528,21 @@ class TestExpectedErrorExceptionHandler(unittest.TestCase):
             expected_request_path = 'request-path-unknown'
         expected_error_exception_handler(self.mock_exception, mock_context)
 
-        expected_class_with_message = "openedx.core.lib.tests.test_request_utils.CustomError1('Test failure')"
+        expected_class = 'openedx.core.lib.tests.test_request_utils.CustomError1'
+        expected_message = 'Test failure'
         mock_logger.info.assert_called_once_with(
-            'Expected error %s seen for path %s',
-            expected_class_with_message,
+            'Expected error %s: %s: seen for path %s',
+            expected_class,
+            expected_message,
             expected_request_path,
             exc_info=None,
         )
         mock_set_custom_attribute.assert_has_calls(
             [
                 call('checked_error_expected_from', 'drf'),
-                call('error_expected', expected_class_with_message),
-                call('error_ignored', True)
+                call('error_expected', True),
+                call('error_ignored_class', expected_class),
+                call('error_ignored_message', expected_message),
             ],
             any_order=True
         )
-- 
GitLab