import sys
import re
def cleancss(text):
text = re.compile('\s+').sub(' ', text)
text = re.compile('\s*(?P,|:)\s*').sub('\g ', text)
text = re.compile('\s*;\s*').sub(';\n ', text)
text = re.compile('\s*\{\s*').sub(' {\n ', text)
text = re.compile('\s*\}\s*').sub('\n}\n\n', text)
return text
def cleanhtml(text):
text = text.lower()
r = re.compile('\', re.DOTALL)
scripts = r.findall(text)
text = r.sub('', text)
r = re.compile('\', re.DOTALL)
styles = r.findall(text)
text = r.sub('', text)
text = re.compile(
'<(?P(input|meta|link|hr|br|img|param))(?P[^\>]*)\s*(?')\
.sub('<\g\g />', text)
text = text.replace('\n', ' ')
text = text.replace('>', '>\n')
text = text.replace('<', '\n<')
text = re.compile('\s*\n\s*').sub('\n', text)
lines = text.split('\n')
(indent, newlines) = (0, [])
for line in lines:
if line[:2] == '': indent = indent - 1
newlines.append(indent * ' ' + line)
if not line[:2] == '' and line[-1:] == '>' and \
not line[-2:] in ['/>', '->']: indent = indent + 1
text = '\n'.join(newlines)
text = re.compile(
'\( .+)?)\>\s+\
').sub('>
', text)
text = re.compile('\( .+)?)\>\s+(?P[\w\s\(\)\/]+?)\s+\').sub('>\g', text)
text = re.compile('\( .+)?)\>\s+(?P[\w\s\(\)\/]+?)\s+\').sub('>\g', text)
text = re.compile('\( .+)?)\>\s+(?P[\w\s\(\)\/]+?)\s+\').sub('>\g', text)
text = re.compile('\( .+)?)\>\s+(?P[\w\s\(\)\/]+?)\s+\').sub('>\g', text)
text = re.compile('\s+\
.*?)\/\>').sub('
/>', text)
text = re.compile('\>(?P\s+)(?P[\.\,\:\;])').sub('>\g\g', text)
text = re.compile('\n\s*\n').sub('\n', text)
for script in scripts:
text = text.replace('', script, 1)
for style in styles:
text = text.replace('', cleancss(style), 1)
return text
def read_file(filename):
f = open(filename, 'r')
try:
return f.read()
finally:
f.close()
file = sys.argv[1]
if file[-4:] == '.css':
print cleancss(read_file(file))
if file[-5:] == '.html':
print cleanhtml(read_file(file))