Commit 67914819 authored by gijs's avatar gijs

Various tools

parent 345254ec
replace_patt = '++$1++'
for k in keywords:
patt = '[^|\s]{0}[\s|$]'.format(k)
md = re.subn(patt, replace_patt, md)
\+\+(.[^\+]+)\+\+
<span class="keyword"></span>
#! /bin/bash
sourcefolder=${1%/}
for book in $sourcefolder/book*; do
echo "Reading ${book}"
for htmlfile in $book/*.html; do
echo "<article data-src=\"${htmlfile}\"></article>"
done;
done;
echo "Applying extra line breaks"
\ No newline at end of file
#! /bin/bash
path=$1
folder=$(dirname "${path}")
htmlfile=$(basename "${path}")
pdffile="${htmlfile%.html}.pdf"
OSPKitPDF "http://html2print/${path}" "${folder}/pdfs/${pdffile}"
\ No newline at end of file
#! /bin/bash
bookfolder=${1%/}
for htmlfile in $bookfolder/*.html; do
basefilename=$(basename "${htmlfile%.html}")
if [ "${basefilename}" != "template" ]; then
#echo "${chapterfile%.*}".pdf
OSPKitPDF "http://html2print/${htmlfile}" "${bookfolder}/pdfs/${basefilename}.pdf"
fi
done
\ No newline at end of file
#! /bin/bash
bookfolder=${1%/}
rm ${bookfolder}/pdfs/*-downsampled.pdf
for pdffile in $bookfolder/pdfs/*.pdf; do
gs \
-o "${pdffile%.pdf}-downsampled.pdf" \
-sDEVICE=pdfwrite \
-dDownsampleColorImages=true \
-dDownsampleGrayImages=true \
-dDownsampleMonoImages=true \
-dColorImageResolution=150 \
-dGrayImageResolution=150 \
-dMonoImageResolution=150 \
-dColorImageDownsampleThreshold=1.0 \
-dGrayImageDownsampleThreshold=1.0 \
-dMonoImageDownsampleThreshold=1.0 \
"${pdffile}"
done
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import glob
import codecs
import os.path
import argparse
parser = argparse.ArgumentParser("Wrap last sentence of paragraphs. To deal with orphans")
parser.add_argument('folder', help="Stories folder, with HTML files")
args = parser.parse_args()
def wrap_last_sentence (m):
content = m.group('content')
interpunctions = [m for m in re.finditer('[\.\?\!\:]\s*(?!$)', content)]
if (interpunctions):
splitindex = interpunctions[-1].end()
else:
splitindex = content.find(u' ', -80) + 1
head = content[:splitindex]
tail = content[splitindex:]
if '>' in tail and not re.match(".*\<(\w+)(?:[^\>])*>.+\</\\1\>.*", tail):
splitindex += [m for m in re.finditer('>\s*', tail)][-1].end()
head = content[:splitindex]
tail = content[splitindex:]
return u'{0}<span class="last-line">{1}</span>'.format(head, tail)
for htmlpath in glob.glob("{0}/*.html".format(os.path.normpath(args.folder))):
print "\t{0}".format(htmlpath)
htmlstring = None
with codecs.open(htmlpath, mode='r', encoding='utf-8') as htmlfile:
htmlstring = htmlfile.read()
htmlstring = re.sub(u'(?P<content>.{170}[\.\?\!\)]\s*)(?=<\/p>)', wrap_last_sentence, htmlstring)
if htmlstring:
with codecs.open(htmlpath, mode='w', encoding='utf-8') as htmlfile:
htmlfile.write(htmlstring)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment