Make compatible to BeautifulSoup4

This commit is contained in:
Moritz Lell 2019-10-30 21:27:46 +01:00
commit a8fd6a123c

View file

@ -1,6 +1,11 @@
from __future__ import print_function from __future__ import print_function
import sys, os, markdown, re import sys, os, markdown, re
try:
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
bsver = 3
except ModuleNotFoundError:
from bs4 import BeautifulSoup
bsver = 4
def _split_lines(s): def _split_lines(s):
return re.findall(r'([^\n]*\n?)', s) return re.findall(r'([^\n]*\n?)', s)
@ -180,7 +185,10 @@ def do_definition(tag):
def do_list(tag): def do_list(tag):
for i in tag: for i in tag:
name = getattr(i, 'name', '').lower() name = getattr(i, 'name', '')
# BeautifulSoup4 sometimes results in 'tag' having attributes that have
# content 'None'
name = name.lower() if name is not None else ''
if not name and not str(i).strip(): if not name and not str(i).strip():
pass pass
elif name != 'li': elif name != 'li':
@ -195,7 +203,11 @@ def do_list(tag):
def do(tag): def do(tag):
name = getattr(tag, 'name', '').lower() name = getattr(tag, 'name', None)
# BeautifulSoup4 sometimes results in 'tag' having attributes that have
# content 'None'
name = name.lower() if name is not None else ''
if not name: if not name:
text(tag) text(tag)
elif name == 'h1': elif name == 'h1':
@ -274,7 +286,12 @@ if AUTHOR:
html = markdown.markdown(inp) html = markdown.markdown(inp)
open(htmlfile, 'w').write(html) open(htmlfile, 'w').write(html)
if(bsver == 3):
soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES) soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
elif(bsver == 4):
soup = BeautifulSoup(html, features = "html.parser")
else: assert 0
macro('.TH', PROD.upper(), SECTION, DATE, VENDOR, GROUPNAME) macro('.TH', PROD.upper(), SECTION, DATE, VENDOR, GROUPNAME)
macro('.ad', 'l') # left justified macro('.ad', 'l') # left justified