feat: convert images to text
for some reason recognizing certain areas is not working
This commit is contained in:
parent
f181aca623
commit
84d71ddc35
@ -2,7 +2,7 @@
|
||||
|
||||
import React, { useEffect, useRef } from "react"
|
||||
import { useProject } from "../../context/Project/provider"
|
||||
|
||||
import processImageData from "../../useCases/processImageData"
|
||||
|
||||
const loadImage = (path: string): Promise<HTMLImageElement> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
@ -111,8 +111,11 @@ const DocumentRenderer = () => {
|
||||
endY = downClickY
|
||||
}
|
||||
|
||||
if (selectedDocument?.id)
|
||||
requestAddArea(selectedDocument.id, { startX, startY, endX, endY })
|
||||
if (selectedDocument?.id) {
|
||||
await requestAddArea(selectedDocument.id, { startX, startY, endX, endY })
|
||||
const results = await processImageData(selectedDocument.id)
|
||||
console.log(results)
|
||||
}
|
||||
|
||||
const context = drawingCanvasInstance.getContext('2d')
|
||||
context?.clearRect(0, 0, drawingCanvasInstance.width, drawingCanvasInstance.height)
|
||||
|
||||
1073
frontend/package-lock.json
generated
1073
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@
|
||||
"next": "^13.0.5",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"tesseract.js": "^4.0.2",
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@ -1 +1 @@
|
||||
b97d9ab5365d0d185d5149430f630621
|
||||
05ad3a0ab3e3a8a6f053da2615d0270f
|
||||
10
frontend/useCases/loadImage.ts
Normal file
10
frontend/useCases/loadImage.ts
Normal file
@ -0,0 +1,10 @@
|
||||
const loadImage = (path: string): Promise<HTMLImageElement> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const image = new Image()
|
||||
image.src = path
|
||||
image.onload = () => resolve(image)
|
||||
image.onerror = (error) => reject(error)
|
||||
})
|
||||
}
|
||||
|
||||
export default loadImage
|
||||
87
frontend/useCases/processImageData.ts
Normal file
87
frontend/useCases/processImageData.ts
Normal file
@ -0,0 +1,87 @@
|
||||
import { createScheduler, createWorker } from "tesseract.js"
|
||||
import { GetDocumentById } from "../wailsjs/wailsjs/go/ipc/Channel"
|
||||
import { ipc } from "../wailsjs/wailsjs/go/models"
|
||||
import loadImage from "./loadImage"
|
||||
|
||||
const getBase64 = (imageData: ImageData) => {
|
||||
const canvasOfSection = document.createElement('canvas')
|
||||
canvasOfSection.width = imageData.width
|
||||
canvasOfSection.height = imageData.height
|
||||
canvasOfSection.getContext('2d')!.putImageData(imageData, 0, 0)
|
||||
return canvasOfSection.toDataURL()
|
||||
}
|
||||
|
||||
const getImageContextFromDocument = async (doc: ipc.Document) => {
|
||||
const image = await loadImage(doc.path)
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = image.naturalWidth
|
||||
canvas.height = image.height
|
||||
|
||||
const context = canvas.getContext('2d')!
|
||||
context?.drawImage(image, 0, 0, image.width, image.height)
|
||||
return context
|
||||
}
|
||||
|
||||
const getImageWorkerCount = (areaCount: number) => {
|
||||
const minWorkerCount = 1
|
||||
const maxWorkerCount = 10
|
||||
const areasPerWorker = 10
|
||||
|
||||
if (areaCount > maxWorkerCount * areasPerWorker) return maxWorkerCount;
|
||||
if (areaCount <= areasPerWorker) return 1
|
||||
|
||||
const workerCount = ~~(areaCount / areasPerWorker)
|
||||
return workerCount
|
||||
}
|
||||
|
||||
const getImageData = async (path: string) => {
|
||||
const image = await loadImage(path)
|
||||
const canvas = document.createElement('canvas')
|
||||
canvas.width = image.naturalWidth
|
||||
canvas.height = image.height
|
||||
|
||||
const context = canvas.getContext('2d')!
|
||||
context?.drawImage(image, 0, 0, image.width, image.height)
|
||||
return canvas.toDataURL();
|
||||
}
|
||||
|
||||
const processImageData = async (documentId: string) => {
|
||||
const foundDocument = await GetDocumentById(documentId)
|
||||
if (!foundDocument.path || !foundDocument.areas?.length) return
|
||||
|
||||
const { areas, path } = foundDocument
|
||||
|
||||
const imageData = await loadImage(path)
|
||||
|
||||
const scheduler = createScheduler()
|
||||
|
||||
const workerCount = getImageWorkerCount(areas.length)
|
||||
for (let index = 0; index < workerCount; index++) {
|
||||
console.log('add worker stuff')
|
||||
const worker = await createWorker()
|
||||
await worker.loadLanguage('eng') // TODO: change this when multilangiage system is implementd
|
||||
await worker.initialize('eng') // TODO: same here
|
||||
scheduler.addWorker(worker)
|
||||
}
|
||||
|
||||
const results = await Promise.allSettled(areas.map(a => {
|
||||
console.log('adding job')
|
||||
// TODO: for some reason the entire image is being recognized
|
||||
console.log({
|
||||
left: a.startX,
|
||||
top: a.startY,
|
||||
width: a.endX - a.startX,
|
||||
height: a.endY - a.startY,
|
||||
})
|
||||
return scheduler.addJob('recognize', imageData, {
|
||||
left: a.startX,
|
||||
top: a.startY,
|
||||
width: a.endX - a.startX,
|
||||
height: a.endY - a.startY,
|
||||
})
|
||||
}))
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
export default processImageData
|
||||
2
frontend/wailsjs/wailsjs/go/ipc/Channel.d.ts
vendored
2
frontend/wailsjs/wailsjs/go/ipc/Channel.d.ts
vendored
@ -2,6 +2,8 @@
|
||||
// This file is automatically generated. DO NOT EDIT
|
||||
import {ipc} from '../models';
|
||||
|
||||
export function GetDocumentById(arg1:string):Promise<ipc.Document>;
|
||||
|
||||
export function GetDocuments():Promise<ipc.GetDocumentsResponse>;
|
||||
|
||||
export function RequestAddArea(arg1:string,arg2:ipc.Area):Promise<ipc.Area>;
|
||||
|
||||
@ -2,6 +2,10 @@
|
||||
// Cynhyrchwyd y ffeil hon yn awtomatig. PEIDIWCH Â MODIWL
|
||||
// This file is automatically generated. DO NOT EDIT
|
||||
|
||||
export function GetDocumentById(arg1) {
|
||||
return window['go']['ipc']['Channel']['GetDocumentById'](arg1);
|
||||
}
|
||||
|
||||
export function GetDocuments() {
|
||||
return window['go']['ipc']['Channel']['GetDocuments']();
|
||||
}
|
||||
|
||||
@ -13,6 +13,31 @@ type GetDocumentsResponse struct {
|
||||
Groups []Group `json:"groups"`
|
||||
}
|
||||
|
||||
func (c *Channel) GetDocumentById(id string) Document {
|
||||
foundDocument := document.GetDocumentCollection().GetDocumentById(id)
|
||||
var jsonAreas []Area
|
||||
|
||||
for _, a := range foundDocument.Areas {
|
||||
jsonAreas = append(jsonAreas, Area{
|
||||
Id: a.Id,
|
||||
Name: a.Name,
|
||||
StartX: a.StartX,
|
||||
StartY: a.StartY,
|
||||
EndX: a.EndX,
|
||||
EndY: a.EndY,
|
||||
})
|
||||
}
|
||||
response := Document{
|
||||
Id: foundDocument.Id,
|
||||
Name: foundDocument.Name,
|
||||
GroupId: foundDocument.GroupId,
|
||||
Path: foundDocument.Path,
|
||||
ProjectId: foundDocument.ProjectId,
|
||||
Areas: jsonAreas,
|
||||
}
|
||||
return response
|
||||
}
|
||||
|
||||
func (c *Channel) GetDocuments() GetDocumentsResponse {
|
||||
documents := document.GetDocumentCollection().Documents
|
||||
groups := document.GetGroupCollection().Groups
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user