今天的另外一个案例是模拟有一定编程基础(新新手)的电脑工作者,让 ChatGPT 处理一些图片数据,需求复杂一些。

具体任务是帮助识别学生缴费图片的信息,便于复核。然后 …… ChatGPT 继续使用 python, 活干得不错。

后面换用 go 语言的过程中,由于 ChatGPT 每次会话字符串长度限制,输出不完整,结果有瑕疵,出现了两个 main 函数,然后修复了。然后使用腾讯云服务,同样原因同样问题,但是已经把最关键的调用部分写得很清楚了,追问了一下流程图也是对的。就不细究了。

Q1 写一个识别图片上的日期、学号和完成字样的算法








Q2 日期格式 2023-02-15 15:00:00,学号格式 15627856,完成字样格式 缴费完成,写一个 python 的本地实现


import cv2 import pytesseract import re # 读取图像 img = cv2.imread("image.jpg") # 将图像转换为灰度图像 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 使用OCR进行文字识别 text = pytesseract.image_to_string(gray) # 识别日期 date_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" date_match = re.search(date_regex, text) if date_match: date = date_match.group(0) print("日期: ", date) else: print("未能识别日期。") # 识别学号 id_regex = r"\d{8}" id_match = re.search(id_regex, text) if id_match: id = id_match.group(0) print("学号: ", id) else: print("未能识别学号。") # 识别完成字样 complete_regex = r"完成" complete_match = re.search(complete_regex, text) if complete_match: complete = complete_match.group(0) print("完成: ", complete) else: print("未能识别完成字样。")


Q3 我有一批图片处理,写一个批处理的算法,识别处理完成后,将图片名称、识别结果存储到csv文件,如果识别失败,注明是哪项


import cv2 import pytesseract import re import os import csv # 设置输入和输出文件夹 input_folder = "input" output_folder = "output" if not os.path.exists(output_folder): os.mkdir(output_folder) # 定义输出csv文件 csv_file = open("output.csv", "w", newline="") csv_writer = csv.writer(csv_file) csv_writer.writerow(["文件名", "日期", "学号", "完成"]) # 遍历所有输入图片 for filename in os.listdir(input_folder): # 读取图像 img = cv2.imread(os.path.join(input_folder, filename)) # 将图像转换为灰度图像 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 使用OCR进行文字识别 text = pytesseract.image_to_string(gray) # 识别日期 date_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" date_match = re.search(date_regex, text) if date_match: date = date_match.group(0) else: date = "未能识别日期。" # 识别学号 id_regex = r"\d{8}" id_match = re.search(id_regex, text) if id_match: id = id_match.group(0) else: id = "未能识别学号。" # 识别完成字样 complete_regex = r"完成" complete_match = re.search(complete_regex, text) if complete_match: complete = complete_match.group(0) else: complete = "未能识别完成字样。" # 将结果存储到输出文件夹和csv文件中 output_filename = os.path.join(output_folder, filename) cv2.imwrite(output_filename, img) csv_writer.writerow([filename, date, id, complete]) # 关闭csv文件 csv_file.close()


Q4 文件太多,有时候识别程序会中断,我得重新运行程序,但是又不想对已经识别的图片重复识别,改一下代码


import cv2 import pytesseract import re import os import csv # 设置输入和输出文件夹 input_folder = "input" output_folder = "output" if not os.path.exists(output_folder): os.mkdir(output_folder) # 定义输出csv文件 csv_file = open("output.csv", "w", newline="") csv_writer = csv.writer(csv_file) csv_writer.writerow(["文件名", "日期", "学号", "完成"]) # 定义状态记录文件 status_file = "status.txt" if os.path.exists(status_file): with open(status_file, "r") as f: processed_files = f.read().splitlines() else: processed_files = [] # 遍历所有输入图片 for filename in os.listdir(input_folder): # 如果图片已经处理过,则跳过 if filename in processed_files: continue # 读取图像 img = cv2.imread(os.path.join(input_folder, filename)) # 将图像转换为灰度图像 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 使用OCR进行文字识别 text = pytesseract.image_to_string(gray) # 识别日期 date_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" date_match = re.search(date_regex, text) if date_match: date = date_match.group(0) else: date = "未能识别日期。" # 识别学号 id_regex = r"\d{8}" id_match = re.search(id_regex, text) if id_match: id = id_match.group(0) else: id = "未能识别学号。" # 识别完成字样 complete_regex = r"完成" complete_match = re.search(complete_regex, text) if complete_match: complete = complete_match.group(0) else: complete = "未能识别完成字样。" # 将结果存储到输出文件夹和csv文件中 output_filename = os.path.join(output_folder, filename) cv2.imwrite(output_filename, img) csv_writer.writerow([filename, date, id, complete]) # 将已处理的文件记录到状态文件中 processed_files.append(filename) with open(status_file, "a") as f: f.write(filename + "\n") # 关闭csv文件 csv_file.close()


Q5 开发一个 web 程序吧,拖动图片到网页上就能识别,支持拖拽多张,显示处理进度,结果可以下载

好的,我来为您介绍一个基于 Flask 和 Tesseract OCR 的 Python Web 应用程序,可以支持拖放多个图像文件进行批量识别,并显示进度和下载结果。

首先,您需要安装 Flask 和 Tesseract OCR。可以使用以下命令来安装这两个库:

pip install Flask pytesseract

接下来,您需要创建一个名为 app.py 的 Python 文件,并在其中添加以下代码:

from flask import Flask, render_template, request, redirect, url_for, send_file from werkzeug.utils import secure_filename from PIL import Image import pytesseract import os import uuid import threading app = Flask(__name__) # 设置允许上传的文件类型和大小限制 ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif']) MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 设置上传文件保存的路径 UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'uploads') if not os.path.exists(UPLOAD_FOLDER): os.mkdir(UPLOAD_FOLDER) # 设置允许同时处理的文件数量 MAX_THREAD_COUNT = 10 # 配置 Tesseract OCR pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe' # 定义一个字典来保存处理进度 progress = {} # 定义一个函数来检查上传的文件类型 def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # 定义一个函数来启动线程来处理图像 def process_images(images, thread_id): for image in images: try: # 读取图像并进行OCR识别 img = Image.open(image) text = pytesseract.image_to_string(img) # 保存识别结果到文本文件中 result_file = os.path.join(UPLOAD_FOLDER, os.path.splitext(os.path.basename(image))[0] + '.txt') with open(result_file, 'w', encoding='utf-8') as f: f.write(text) # 更新处理进度 progress[thread_id]['processed'] += 1 progress[thread_id]['status'] = '正在处理第 {}/{} 张图像'.format(progress[thread_id]['processed'], progress[thread_id]['total']) except: # 如果处理出现错误,则更新处理进度 progress[thread_id]['failed'].append(os.path.basename(image)) progress[thread_id]['status'] = '正在处理第 {}/{} 张图像(处理失败 {} 张)'.format(progress[thread_id]['processed'], progress[thread_id]['total'], len(progress[thread_id]['failed'])) # 定义一个视图函数来处理上传的图像 @app.route('/', methods=['GET', 'POST']) def upload_file(): if request.method == 'POST': # 检查上传文件是否合法 if 'files[]' not in request.files: return redirect(request.url) files = request.files.getlist('files[]') if len(files) == 0: return redirect(request.url) for file in files: if file.filename == '': return redirect(request.url) if not allowed_file(file.filename): return redirect(request.url) # 生成一个唯一的ID,并将图像保存到上传文件夹中 thread_id = str(uuid.uuid4()) thread_folder = os.path.join(UPLOAD_FOLDER, thread_id) os.mkdir(thread_folder) for file in files: filename = secure_filename(file.filename) file_path = os.path.join(thread_folder, filename) file.save(file_path) # 计算图像数量和每个线程要处理的图像数量 total_images = len(files) images_per_thread = total_images // MAX_THREAD_COUNT if total_images % MAX_THREAD_COUNT != 0: images_per_thread += 1 # 初始化处理进度 progress[thread_id] = { 'total': total_images, 'processed': 0, 'failed': [], 'status': '正在处理第 0/{} 张图像'.format(total_images) } # 启动线程来处理图像 threads = [] for i in range(MAX_THREAD_COUNT): start_index = i * images_per_thread end_index = min(start_index + images_per_thread, total_images) if start_index >= end_index: break images = [os.path.join(thread_folder, files[j].filename) for j in range(start_index, end_index)] thread = threading.Thread(target=process_images, args=(images, thread_id)) threads.append(thread) thread.start() # 返回进度页面 return redirect(url_for('progress', thread_id=thread_id)) # 显示上传页面 return render_template('upload.html') 定义一个视图函数来显示处理进度 @app.route('/progress/') def progress(thread_id): if thread_id not in progress: return '错误:无效的线程ID' if progress[thread_id]['processed'] < progress[thread_id]['total']: return render_template('progress.html', thread_id=thread_id, status=progress[thread_id]['status'], progress=progress[thread_id]['processed'], total=progress[thread_id]['total'], failed=progress[thread_id]['failed']) else: return redirect(url_for('download', thread_id=thread_id)) 定义一个视图函数来下载处理结果 @app.route('/download/') def download(thread_id): if thread_id not in progress: return '错误:无效的线程ID' result_file = os.path.join(UPLOAD_FOLDER, thread_id + '.zip') if not os.path.exists(result_file): # 将所有处理结果压缩成一个zip文件 thread_folder = os.path.join(UPLOAD_FOLDER, thread_id) files = [os.path.join(thread_folder, f) for f in os.listdir(thread_folder) if f.endswith('.txt')] os.system('cd {} && zip -r ../{}.zip .'.format(thread_folder, thread_id)) # 返回压缩文件作为下载 return send_file(result_file, as_attachment=True) if name == 'main': app.run(debug=True) Q6 能不能使用go开发


首先,需要安装Go语言的OCR库,如github.com/otiai10/gosseract/v2。这个库可以调用Tesseract OCR引擎,识别图像中的文本。同时,还需要安装Google Cloud Vision API的Go语言SDK。

go get github.com/otiai10/gosseract/v2 go get cloud.google.com/go/vision/apiv1


package main import ( "context" "fmt" "html/template" "io" "io/ioutil" "log" "net/http" "os" "path/filepath" "sync" "cloud.google.com/go/storage" "cloud.google.com/go/vision/apiv1" "github.com/otiai10/gosseract/v2" "google.golang.org/api/option" ) func main() { http.HandleFunc("/", handleUpload) http.ListenAndServe(":8080", nil) } func handleUpload(w http.ResponseWriter, r *http.Request) { if r.Method == http.MethodPost { // 读取用户上传的图像文件 file, header, err := r.FormFile("image") if err != nil { http.Error(w, "无法读取上传的文件", http.StatusBadRequest) return } defer file.Close() // 保存图像文件到 Google Cloud Storage ctx := context.Background() client, err := storage.NewClient(ctx, option.WithCredentialsFile("credentials.json")) if err != nil { http.Error(w, "无法连接到 Google Cloud Storage", http.StatusInternalServerError) return } bucket := client.Bucket("your-bucket-name") objectName := fmt.Sprintf("%s-%s", header.Filename, randomString(8)) wc := bucket.Object(objectName).NewWriter(ctx) if _, err = io.Copy(wc, file); err != nil { http.Error(w, "无法保存上传的文件", http.StatusInternalServerError) return } if err := wc.Close(); err != nil { http.Error(w, "无法保存上传的文件", http.StatusInternalServerError) return } // 调用 OCR 库识别图像中的文本 result, err := recognize(ctx, bucket, objectName) if err != nil { http.Error(w, "无法识别图像中的文本", http.StatusInternalServerError) return } // 返回识别结果给用户 data := struct { Image string Result string }{ Image: header.Filename, Result: result, } tmpl, _ := template.ParseFiles("result.html") tmpl.Execute(w, data) } else { // 显示上传表单给用户 tmpl, _ := template.ParseFiles("upload.html") tmpl.Execute(w, nil) } } func recognize(ctx context.Context, bucket *storage.BucketHandle, objectName string) (string, error) { // 使用 Google Cloud Vision API 来识别图像中的文本 client, err := vision.NewImageAnnotatorClient(ctx) if err != nil { return "", err } defer client.Close() image := vision.NewImageFromURI(fmt.Sprintf("gs://%s/%s", bucket.Name(), objectName)) annotations, err := client.DetectTexts(ctx, image, nil, 10) if err != nil { return "", err } var wg sync.WaitGroup var mutex sync.Mutex var results []string // 并发处理识别结果 for _, annotation := range annotations { wg.Add(1) go func(annotation *vision.EntityAnnotation) { defer wg.Done() // 检查文本是否为有效的学号 if isStudentID(annotation.Description) { mutex.Lock() results = append(results, fmt.Sprintf("学号:%s", annotation.Description)) mutex.Unlock() return } // 检查文本是否为有效的日期 if isDate(annotation.Description) { mutex.Lock() results = append(results, fmt.Sprintf("日期:%s", annotation.Description)) mutex.Unlock() return } // 检查文本是否为“完成”字样 if isComplete(annotation.Description) { mutex.Lock() results = append(results, "完成") mutex.Unlock() return } // 如果文本无法识别,则使用 OCR 库进一步处理 text, err := recognizeText(bucket, objectName, annotation.BoundingPoly) if err != nil { mutex.Lock() results = append(results, fmt.Sprintf("无法识别:%s", annotation.Description)) mutex.Unlock() return } // 检查 OCR 库返回的文本是否为有效的学号 if isStudentID(text) { mutex.Lock() results = append(results, fmt.Sprintf("学号:%s", text)) mutex.Unlock() return } // 检查 OCR 库返回的文本是否为有效的日期 if isDate(text) { mutex.Lock() results = append(results, fmt.Sprintf("日期:%s", text)) mutex.Unlock() return } // 如果仍无法识别,则将文本添加到无法识别的列表中 mutex.Lock() results = append(results, fmt.Sprintf("无法识别:%s", annotation.Description)) mutex.Unlock() } (annotation) } wg.Wait() return concatResults(results), nil } func recognizeText(bucket *storage.BucketHandle, objectName string, boundingPoly *vision.BoundingPoly) (string, error) { // 将图像中指定区域的文本使用 OCR 库进行识别 ctx := context.Background() client, err := gosseract.NewClient() if err != nil { return "", err } defer client.Close() imagePath, err := downloadImage(ctx, bucket, objectName) if err != nil { return "", err } defer os.Remove(imagePath) x, y, w, h := getBoundingPolyCoords(boundingPoly) client.SetImage(imagePath, gosseract.Rect(x, y, w, h)) return client.Text() } func downloadImage(ctx context.Context, bucket *storage.BucketHandle, objectName string) (string, error) { // 从 Google Cloud Storage 下载图像文件到本地临时文件 reader, err := bucket.Object(objectName).NewReader(ctx) if err != nil { return "", err } defer reader.Close() content, err := ioutil.ReadAll(reader) if err != nil { return "", err } imagePath := filepath.Join(os.TempDir(), objectName) if err := ioutil.WriteFile(imagePath, content, 0644); err != nil { return "", err } return imagePath, nil } func getBoundingPolyCoords(boundingPoly *vision.BoundingPoly) (int, int, int, int) { // 获取指定区域的坐标 var minX, minY, maxX, maxY int32 for i, vertex := range boundingPoly.Vertices { if i == 0 { minX, minY, maxX, maxY = vertex.X, vertex.Y, vertex.X, vertex.Y continue } if vertex.X < minX { minX = vertex.X } if vertex.Y < minY { minY = vertex.Y } if vertex.X > maxX { maxX = vertex.X } if vertex.Y > maxY { maxY = vertex.Y } } return int(minX), int(minY), int(maxX-minX), int(maxY-minY) } func concatResults(results []string) string { // 将结果拼接成一个字符串 return strings.Join(results, "\n") } func isStudentID(text string) bool { // 检查文本是否为有效的学号 return regexp.MustCompile(^\d{8}$).MatchString(text) } func isDate(text string) bool { // 检查文本是否为有效的日期 _, err := time.Parse("2006-01-02 15:04:05", text) return err == nil } func isComplete(text string) bool { // 检查文本是否为“完成”字样 return strings.Contains(text, "完成") } func getAnnotations(imageURI string) ([]*vision.EntityAnnotation, error) { // 从 Google Cloud Vision API 获取图像的文本注释 ctx := context.Background() client, err := vision.NewImageAnnotatorClient(ctx) if err != nil { return nil, err } defer client.Close() image := vision.NewImageFromURI(imageURI) annotations, err := client.DetectTexts(ctx, image, nil, 1) if err != nil { return nil, err } return annotations, nil } func processImage(imageURI string) (string, error) { // 处理一张图像并返回识别结果 var results []string // 获取文本注释 annotations, err := getAnnotations(imageURI) if err != nil { return "", err } // 处理每个注释 for _, annotation := range annotations { // 忽略非文本注释 if annotation.Description == "" { continue } // 获取文本注释所在的矩形区域 x, y, w, h := getBoundingRect(annotation.BoundingPoly) // 将结果添加到列表中 if isStudentID(annotation.Description) { results = append(results, fmt.Sprintf("学号: %s", annotation.Description)) } else if isDate(annotation.Description) { results = append(results, fmt.Sprintf("日期: %s", annotation.Description)) } else if isComplete(annotation.Description) { results = append(results, fmt.Sprintf("状态: %s", annotation.Description)) } else { results = append(results, fmt.Sprintf("未知文本 \"%s\" 在 (%d, %d, %d, %d)", annotation.Description, x, y, w, h)) } } // 将结果拼接成一个字符串 resultStr := concatResults(results) return resultStr, nil } func handleImage(w http.ResponseWriter, r *http.Request) { // 处理上传的图像文件 // 设置响应头 w.Header().Set("Content-Type", "text/csv") w.Header().Set("Content-Disposition", "attachment; filename=result.csv") // 解析表单 err := r.ParseMultipartForm(32 调用 OCR API 进行图片识别 -> 处理结果保存到 CSV 文件 -> 返回识别结果给用户 -> 结束

其中,图片预处理的主要目的是将原始图片进行格式转换、缩放等操作,以满足 OCR API 对图片的要求。OCR API 的调用可以通过腾讯云等提供商的 API 接口实现。处理结果保存到 CSV 文件后,可以通过下载链接将 CSV 文件提供给用户。







