shithub: rc

Info • Files • Log • Branches
ref: 1fd6a3738a4775ee3a326a31a750778c1d0ddda1
dir: /epub2html/
#!/bin/rc
# epub2html - convert epub to html
# usage: epub2html file.epub
# bugs:  only one epub at a time

# set some defaults
rfork e
cwd=`{pwd}
fn usage{
    echo Usage: epub2html file.epub >[1=2]
    exit usage
}    
if(! ~ $#* 1) usage
file=$1
if(! ~ $file /*) file=`{cleanname $cwd/$1}
if(! test -f $file && ! ~ $1 *.[Ee][Pp][Uu][Bb]) usage
name=`{basename $1 | sed 's/\.[Ee][Pp][Uu][Bb]//'}
dir=$name^_files

# determine directory name of toc file
fn ops{
    ops=`{ls -p $1 | grep -i '^o.*ps'}
    if(~ $#ops 0) echo $1
    if not{
        toc=`{ls -p $1/$ops | grep -i 'toc.ncx'}
        if(~ $#toc 0) echo $1
        if not echo $1/$ops
    }
}

# extract epub and chapter information
mkdir -p $dir && cd $dir
unzip -af $file >/dev/null >[2=1]
ops=`{ops $cwd/$dir} && cd $ops
cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
  sed -n 's/.*<text>(.*)<\/text>.*/\1/p' > chaps
cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
  sed -n 's/.*src="(.*)".*/\1/p' | sed 's/%20/ /g' > links

# generate html index
cat <<eof > $cwd/$name.html
<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8">
        <title>Contents</title>
    </head>
    <body>
        <h1>Contents:</h1>
eof
for(i in `{seq `{cat links | wc -l}}){
    link=`{sed -n $i^p links}
    chap=`{sed -n $i^p chaps}
    echo '        <a href="'$ops^/^$"link'">'$"chap'</a><br>' \
      >> $cwd/$name.html
}
cat <<eof >> $cwd/$name.html
    </body>
</html>
eof