Home | History | Annotate | Download | only in bs4

Lines Matching refs:markup

215     def __init__(self, markup, override_encodings=None, is_html=False):
222 self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
234 """Yield a number of encodings that might work for this markup."""
249 self.markup, self.is_html)
256 self.chardet_encoding = chardet_dammit(self.markup)
289 def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
298 xml_endpos = html_endpos = len(markup)
301 html_endpos = max(2048, int(len(markup) * 0.05))
304 declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
306 declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
333 def __init__(self, markup, override_encodings=[],
340 self.detector = EncodingDetector(markup, override_encodings, is_html)
343 if isinstance(markup, unicode) or markup == '':
344 self.markup = markup
345 self.unicode_markup = unicode(markup)
350 # Use the stripped markup from this point on.
351 self.markup = self.detector.markup
355 markup = self.detector.markup
403 markup = self.markup
410 markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
415 u = self._to_unicode(markup, proposed, errors)
416 self.markup = u
423 return self.markup