import json, re
import uuid
from urllib.parse import quote, unquote
import yaml
import os, urllib.request
import time
from flask import Flask, config, render_template, request, redirect, make_response
from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(use_angle_cls=True, lang='en')
app = Flask(__name__)
@app.route('/ocr', methods=['GET'])
def do_ocr():
if request.method == 'GET':
data = request.args
if 'url' in data:
url = unquote(data['url'])
else:
return ""
tmp_path = f"/dev/shm/{uuid.uuid4()}"
urllib.request.urlretrieve(url, tmp_path)
result = ocr.ocr(tmp_path, cls=True)
data = ""
for recr in result:
rec = recr[1][0].upper()
data += rec + ","
os.remove(tmp_path)
return data.strip(",")
if __name__ == '__main__':
app.run()
ocrapi.py 是从别处复制来的猴子补丁(不然好像并发会有问题)
from gevent import monkey, pywsgi
monkey.patch_all()
from app import app
用了 supervisor 启动:
[program:ocrapi]
command=gunicorn --workers 6 --worker-class=gevent --worker-connections=50 -b 0.0.0.0:8080 ocrapi:app
directory=/opt/ocrapi/
user=root
autorestart=true
redirect_stderr=true
stdout_logfile=/var/log/ocrapi.log
loglevel=info
服务启动时内存占用在 2GB 左右,随后越来越大
另外想问下如果只是用于识别包装盒上的产品序列号(大写英文数字组成,位数固定,手机拍照的,白底黑字/黑底白字都有可能)有效果更好的方案吗?