ref: 1fd6a3738a4775ee3a326a31a750778c1d0ddda1
dir: /epub2html/
#!/bin/rc
# epub2html - convert epub to html
# usage: epub2html file.epub
# bugs: only one epub at a time
# set some defaults
rfork e
cwd=`{pwd}
fn usage{
echo Usage: epub2html file.epub >[1=2]
exit usage
}
if(! ~ $#* 1) usage
file=$1
if(! ~ $file /*) file=`{cleanname $cwd/$1}
if(! test -f $file && ! ~ $1 *.[Ee][Pp][Uu][Bb]) usage
name=`{basename $1 | sed 's/\.[Ee][Pp][Uu][Bb]//'}
dir=$name^_files
# determine directory name of toc file
fn ops{
ops=`{ls -p $1 | grep -i '^o.*ps'}
if(~ $#ops 0) echo $1
if not{
toc=`{ls -p $1/$ops | grep -i 'toc.ncx'}
if(~ $#toc 0) echo $1
if not echo $1/$ops
}
}
# extract epub and chapter information
mkdir -p $dir && cd $dir
unzip -af $file >/dev/null >[2=1]
ops=`{ops $cwd/$dir} && cd $ops
cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
sed -n 's/.*<text>(.*)<\/text>.*/\1/p' > chaps
cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
sed -n 's/.*src="(.*)".*/\1/p' | sed 's/%20/ /g' > links
# generate html index
cat <<eof > $cwd/$name.html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Contents</title>
</head>
<body>
<h1>Contents:</h1>
eof
for(i in `{seq `{cat links | wc -l}}){
link=`{sed -n $i^p links}
chap=`{sed -n $i^p chaps}
echo ' <a href="'$ops^/^$"link'">'$"chap'</a><br>' \
>> $cwd/$name.html
}
cat <<eof >> $cwd/$name.html
</body>
</html>
eof