shithub: rc

Download patch

ref: eef67b503efb2ec9fbec704270207e720b15ce18
parent: e632ea03e5154e539cd4c252c40e365412b10472
author: sl <sl@x1yg3>
date: Sun Jul 6 18:15:18 EDT 2025

add epub2html

--- a/INDEX
+++ b/INDEX
@@ -17,6 +17,7 @@
 crip - Repack .cbr as .cbz. Try to fix the file names.
 df - du -h /root | tail -1
 dsrc - delete posts on a given barf website
+epub2html - convert epub to html
 flac2alac - Convert FLAC to ALAC using ffmpeg.
 flac2mp3 - Convert FLAC to MP3 using lame.
 flacsplit - Split single FLAC into multiple files based on .cue sheet using shntool.
--- /dev/null
+++ b/epub2html
@@ -1,0 +1,60 @@
+#!/bin/rc
+# epub2html - convert epub to html
+# usage: epub2html file.epub
+# bugs:  only one epub at a time
+
+# set some defaults
+rfork e
+cwd=`{pwd}
+fn usage{
+    echo Usage: epub2html file.epub >[1=2]
+    exit usage
+}    
+if(! ~ $#* 1) usage
+file=$1
+if(! ~ $file /*) file=`{cleanname $cwd/$1}
+if(! test -f $file && ! ~ $1 *.[Ee][Pp][Uu][Bb]) usage
+name=`{basename $1 | sed 's/\.[Ee][Pp][Uu][Bb]//'}
+dir=$name^_files
+
+# determine directory name of toc file
+fn ops{
+    ops=`{ls -p $1 | grep -i '^o.*ps'}
+    if(~ $#ops 0) echo $1
+    if not{
+        toc=`{ls -p $1/$ops | grep -i 'toc.ncx'}
+        if(~ $#toc 0) echo $1
+        if not echo $1/$ops
+    }
+}
+
+# extract epub and chapter information
+mkdir -p $dir && cd $dir
+unzip -af $file >/dev/null >[2=1]
+ops=`{ops $cwd/$dir} && cd $ops
+cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
+  sed -n 's/.*<text>(.*)<\/text>.*/\1/p' > chaps
+cat [Tt][Oo][Cc].[Nn][Cc][Xx] | sed -n '/<navPoint/,/<\/navPoint/p' |
+  sed -n 's/.*src="(.*)".*/\1/p' | sed 's/%20/ /g' > links
+
+# generate html index
+cat <<eof > $cwd/$name.html
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta charset="utf-8">
+        <title>Contents</title>
+    </head>
+    <body>
+        <h1>Contents:</h1>
+eof
+for(i in `{seq `{cat links | wc -l}}){
+    link=`{sed -n $i^p links}
+    chap=`{sed -n $i^p chaps}
+    echo '        <a href="'$ops^/^$"link'">'$"chap'</a><br>' \
+      >> $cwd/$name.html
+}
+cat <<eof >> $cwd/$name.html
+    </body>
+</html>
+eof
--