Summary
安装过程
export DOCKER_PROJ=s-pdf
export DOCKER_PROJ_DOMAIN=$DOCKER_PROJ.xxx.xxx
cd /data_n001/data/udata/real/156/DockerSSD/
mkdir -p $DOCKER_PROJ && cd $DOCKER_PROJ
cat > docker-compose.yml <<EOF
services:
spdf:
image: frooodle/s-pdf
container_name: s-pdf
#ports:
# - "8080:8080"
volumes:
- /opt/homebrew/share/tessdata:/usr/share/tessdata
restart: unless-stopped
networks:
default:
name: npm_nginx_proxy_manager-network
external: true
EOF
docker compose down; docker compose up -d; sleep 1s; docker compose logs -f
# set up npm: -> http://dnsmgr-web:80
# set up zspace极空间 远程转发 -> http://ip:42891
关于ocr
# Tesseract OCR(Optical Character Recognition)
brew install tesseract
tesseract --version
tesseract --list-langs
cd /opt/homebrew/share/tessdata/
git clone https://github.com/tesseract-ocr/tessdata_best
git clone https://github.com/tesseract-ocr/tessdata
cp tessdata/jpn.traineddata .
cp tessdata/chi*.traineddata .
# PDF rendering library (based on the xpdf-3.0 code base)
# https://poppler.freedesktop.org/
brew install poppler
cd /data
git clone https://github.com/ElectricRCAircraftGuy/PDF2SearchablePDF
cd PDF2SearchablePDF
# add 日文
./pdf2searchablepdf.sh input.pdf jpn
Raw