From 5cb145054d13281cdce74ae98c615626eabd5f9d Mon Sep 17 00:00:00 2001
From: "Dave St.Germain" <dstgermain@edx.org>
Date: Fri, 21 Mar 2014 15:54:22 -0400
Subject: [PATCH] Fixes BLD-948 by escaping messages from xqueue, preventing
 malformed html from getting onto the page. The client will then unescape the
 html and display it.

---
 common/lib/capa/capa/inputtypes.py            | 12 ++++-
 common/lib/capa/capa/tests/test_inputtypes.py | 50 +++++++++++++++++++
 .../xmodule/tests/test_combined_open_ended.py | 10 ++--
 requirements/edx/base.txt                     |  4 +-
 4 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/common/lib/capa/capa/inputtypes.py b/common/lib/capa/capa/inputtypes.py
index bc7674b3c07..a9ce23c2e5b 100644
--- a/common/lib/capa/capa/inputtypes.py
+++ b/common/lib/capa/capa/inputtypes.py
@@ -47,6 +47,7 @@ import shlex  # for splitting quoted strings
 import sys
 import pyparsing
 import html5lib
+import bleach
 
 from .registry import TagRegistry
 from chem import chemcalc
@@ -746,7 +747,7 @@ class CodeInput(InputTypeBase):
         if self.status == 'incomplete':
             self.status = 'queued'
             self.queue_len = self.msg
-            self.msg = self.submitted_msg
+            self.msg = bleach.clean(self.submitted_msg)
 
     def setup(self):
         """ setup this input type """
@@ -802,7 +803,14 @@ class MatlabInput(CodeInput):
         # this is only set if we don't have a graded response
         # the graded response takes precedence
         if 'queue_msg' in self.input_state and self.status in ['queued', 'incomplete', 'unsubmitted']:
-            self.queue_msg = self.input_state['queue_msg']
+            attributes = bleach.ALLOWED_ATTRIBUTES.copy()
+            attributes.update({'*': ['class', 'style', 'id'], 'audio': ['controls', 'autobuffer', 'autoplay', 'src']})
+            self.queue_msg = bleach.clean(self.input_state['queue_msg'],
+                    tags=bleach.ALLOWED_TAGS + ['div', 'p', 'audio', 'pre'],
+                    styles=['white-space'],
+                    attributes=attributes
+                    )
+
         if 'queuestate' in self.input_state and self.input_state['queuestate'] == 'queued':
             self.status = 'queued'
             self.queue_len = 1
diff --git a/common/lib/capa/capa/tests/test_inputtypes.py b/common/lib/capa/capa/tests/test_inputtypes.py
index 679ea90aa90..890dbb2ecc3 100644
--- a/common/lib/capa/capa/tests/test_inputtypes.py
+++ b/common/lib/capa/capa/tests/test_inputtypes.py
@@ -646,6 +646,56 @@ class MatlabTest(unittest.TestCase):
 
         self.the_input.capa_system.render_template = old_render_template
 
+    def test_malformed_queue_msg(self):
+        # an actual malformed response
+        queue_msg = textwrap.dedent("""
+    <div class='matlabResponse'><div style='white-space:pre' class='commandWindowOutput'> <strong>if</strong> Conditionally execute statements.
+    The general form of the <strong>if</strong> statement is
+ 
+       <strong>if</strong> expression
+         statements
+       ELSEIF expression
+         statements
+       ELSE
+         statements
+       END
+ 
+    The statements are executed if the real part of the expression 
+    has all non-zero elements. The ELSE and ELSEIF parts are optional.
+    Zero or more ELSEIF parts can be used as well as nested <strong>if</strong>'s.
+    The expression is usually of the form expr rop expr where 
+    rop is ==, <, >, <=, >=, or ~=.
+ 
+    Example
+       if I == J
+         A(I,J) = 2;
+       elseif abs(I-J) == 1
+         A(I,J) = -1;
+       else
+         A(I,J) = 0;
+       end
+ 
+    See also <a href="matlab:help relop">relop</a>, <a href="matlab:help else">else</a>, <a href="matlab:help elseif">elseif</a>, <a href="matlab:help end">end</a>, <a href="matlab:help for">for</a>, <a href="matlab:help while">while</a>, <a href="matlab:help switch">switch</a>.
+
+    Reference page in Help browser
+       <a href="matlab:doc if">doc if</a>
+
+    </div><ul></ul></div>
+        """)
+
+        state = {'value': 'print "good evening"',
+                 'status': 'incomplete',
+                 'input_state': {'queue_msg': queue_msg},
+                 'feedback': {'message': '3'}, }
+        elt = etree.fromstring(self.xml)
+
+        the_input = self.input_class(test_capa_system(), elt, state)
+        context = the_input._get_render_context()  # pylint: disable=W0212
+        self.maxDiff = None
+        expected = u'\n<div class="matlabResponse"><div class="commandWindowOutput" style="white-space: pre;"> <strong>if</strong> Conditionally execute statements.\nThe general form of the <strong>if</strong> statement is\n\n   <strong>if</strong> expression\n     statements\n   ELSEIF expression\n     statements\n   ELSE\n     statements\n   END\n\nThe statements are executed if the real part of the expression \nhas all non-zero elements. The ELSE and ELSEIF parts are optional.\nZero or more ELSEIF parts can be used as well as nested <strong>if</strong>\'s.\nThe expression is usually of the form expr rop expr where \nrop is ==, &lt;, &gt;, &lt;=, &gt;=, or ~=.\n\nExample\n   if I == J\n     A(I,J) = 2;\n   elseif abs(I-J) == 1\n     A(I,J) = -1;\n   else\n     A(I,J) = 0;\n   end\n\nSee also <a>relop</a>, <a>else</a>, <a>elseif</a>, <a>end</a>, <a>for</a>, <a>while</a>, <a>switch</a>.\n\nReference page in Help browser\n   <a>doc if</a>\n\n</div><ul></ul></div>\n'
+
+        self.assertEqual(context['queue_msg'], expected)
+
 
 class SchematicTest(unittest.TestCase):
     '''
diff --git a/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py b/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
index f08af88a877..3eb5c914fe6 100644
--- a/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
+++ b/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
@@ -1288,7 +1288,7 @@ class OpenEndedModuleXmlImageUploadTest(unittest.TestCase, DummyModulestore):
     file_text = "Hello, this is my amazing file."
     file_name = "Student file 1"
     answer_link = "http://www.edx.org"
-    autolink_tag = "<a href="
+    autolink_tag = '<a target="_blank" href='
 
     def get_module_system(self, descriptor):
         test_system = get_test_system()
@@ -1365,11 +1365,11 @@ class OpenEndedModuleUtilTest(unittest.TestCase):
     script_dirty = u'<script>alert("xss!")</script>'
     script_clean = u'alert("xss!")'
     img_dirty = u'<img alt="cats" height="200" onclick="eval()" src="http://example.com/lolcats.jpg" width="200">'
-    img_clean = u'<img alt="cats" height="200" src="http://example.com/lolcats.jpg" width="200">'
+    img_clean = u'<img width="200" alt="cats" height="200" src="http://example.com/lolcats.jpg">'
     embed_dirty = u'<embed height="200" id="cats" onhover="eval()" src="http://example.com/lolcats.swf" width="200"/>'
-    embed_clean = u'<embed height="200" id="cats" src="http://example.com/lolcats.swf" width="200">'
+    embed_clean = u'<embed width="200" height="200" id="cats" src="http://example.com/lolcats.swf">'
     iframe_dirty = u'<iframe class="cats" height="200" onerror="eval()" src="http://example.com/lolcats" width="200"/>'
-    iframe_clean = u'<iframe class="cats" height="200" src="http://example.com/lolcats" width="200"></iframe>'
+    iframe_clean = u'<iframe height="200" class="cats" width="200" src="http://example.com/lolcats"></iframe>'
 
     text = u'I am a \u201c\xfcber student\u201d'
     text_lessthan_noencd = u'This used to be broken < by the other parser. 3>5'
@@ -1378,7 +1378,7 @@ class OpenEndedModuleUtilTest(unittest.TestCase):
     text_brs = u"St\xfcdent submission:<br/>I like lamp."
 
     link_text = u'I love going to www.lolcatz.com'
-    link_atag = u'I love going to <a href="http://www.lolcatz.com" target="_blank">www.lolcatz.com</a>'
+    link_atag = u'I love going to <a target="_blank" href="http://www.lolcatz.com">www.lolcatz.com</a>'
 
     def test_script(self):
         """
diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt
index d649bd6658a..65e1e805448 100644
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -8,8 +8,8 @@
 
 beautifulsoup4==4.1.3
 beautifulsoup==3.2.1
-bleach==1.2.2
-html5lib==0.95
+bleach==1.4
+html5lib==0.999
 boto==2.13.3
 celery==3.0.19
 dealer==0.2.3
-- 
GitLab