feat: convert images to text
for some reason recognizing certain areas is not working
This commit is contained in:
parent
f181aca623
commit
84d71ddc35
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import React, { useEffect, useRef } from "react"
|
import React, { useEffect, useRef } from "react"
|
||||||
import { useProject } from "../../context/Project/provider"
|
import { useProject } from "../../context/Project/provider"
|
||||||
|
import processImageData from "../../useCases/processImageData"
|
||||||
|
|
||||||
const loadImage = (path: string): Promise<HTMLImageElement> => {
|
const loadImage = (path: string): Promise<HTMLImageElement> => {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
@ -111,8 +111,11 @@ const DocumentRenderer = () => {
|
|||||||
endY = downClickY
|
endY = downClickY
|
||||||
}
|
}
|
||||||
|
|
||||||
if (selectedDocument?.id)
|
if (selectedDocument?.id) {
|
||||||
requestAddArea(selectedDocument.id, { startX, startY, endX, endY })
|
await requestAddArea(selectedDocument.id, { startX, startY, endX, endY })
|
||||||
|
const results = await processImageData(selectedDocument.id)
|
||||||
|
console.log(results)
|
||||||
|
}
|
||||||
|
|
||||||
const context = drawingCanvasInstance.getContext('2d')
|
const context = drawingCanvasInstance.getContext('2d')
|
||||||
context?.clearRect(0, 0, drawingCanvasInstance.width, drawingCanvasInstance.height)
|
context?.clearRect(0, 0, drawingCanvasInstance.width, drawingCanvasInstance.height)
|
||||||
|
|||||||
1073
frontend/package-lock.json
generated
1073
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@
|
|||||||
"next": "^13.0.5",
|
"next": "^13.0.5",
|
||||||
"react": "^18.2.0",
|
"react": "^18.2.0",
|
||||||
"react-dom": "^18.2.0",
|
"react-dom": "^18.2.0",
|
||||||
|
"tesseract.js": "^4.0.2",
|
||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
b97d9ab5365d0d185d5149430f630621
|
05ad3a0ab3e3a8a6f053da2615d0270f
|
||||||
10
frontend/useCases/loadImage.ts
Normal file
10
frontend/useCases/loadImage.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
const loadImage = (path: string): Promise<HTMLImageElement> => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const image = new Image()
|
||||||
|
image.src = path
|
||||||
|
image.onload = () => resolve(image)
|
||||||
|
image.onerror = (error) => reject(error)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
export default loadImage
|
||||||
87
frontend/useCases/processImageData.ts
Normal file
87
frontend/useCases/processImageData.ts
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import { createScheduler, createWorker } from "tesseract.js"
|
||||||
|
import { GetDocumentById } from "../wailsjs/wailsjs/go/ipc/Channel"
|
||||||
|
import { ipc } from "../wailsjs/wailsjs/go/models"
|
||||||
|
import loadImage from "./loadImage"
|
||||||
|
|
||||||
|
const getBase64 = (imageData: ImageData) => {
|
||||||
|
const canvasOfSection = document.createElement('canvas')
|
||||||
|
canvasOfSection.width = imageData.width
|
||||||
|
canvasOfSection.height = imageData.height
|
||||||
|
canvasOfSection.getContext('2d')!.putImageData(imageData, 0, 0)
|
||||||
|
return canvasOfSection.toDataURL()
|
||||||
|
}
|
||||||
|
|
||||||
|
const getImageContextFromDocument = async (doc: ipc.Document) => {
|
||||||
|
const image = await loadImage(doc.path)
|
||||||
|
const canvas = document.createElement('canvas')
|
||||||
|
canvas.width = image.naturalWidth
|
||||||
|
canvas.height = image.height
|
||||||
|
|
||||||
|
const context = canvas.getContext('2d')!
|
||||||
|
context?.drawImage(image, 0, 0, image.width, image.height)
|
||||||
|
return context
|
||||||
|
}
|
||||||
|
|
||||||
|
const getImageWorkerCount = (areaCount: number) => {
|
||||||
|
const minWorkerCount = 1
|
||||||
|
const maxWorkerCount = 10
|
||||||
|
const areasPerWorker = 10
|
||||||
|
|
||||||
|
if (areaCount > maxWorkerCount * areasPerWorker) return maxWorkerCount;
|
||||||
|
if (areaCount <= areasPerWorker) return 1
|
||||||
|
|
||||||
|
const workerCount = ~~(areaCount / areasPerWorker)
|
||||||
|
return workerCount
|
||||||
|
}
|
||||||
|
|
||||||
|
const getImageData = async (path: string) => {
|
||||||
|
const image = await loadImage(path)
|
||||||
|
const canvas = document.createElement('canvas')
|
||||||
|
canvas.width = image.naturalWidth
|
||||||
|
canvas.height = image.height
|
||||||
|
|
||||||
|
const context = canvas.getContext('2d')!
|
||||||
|
context?.drawImage(image, 0, 0, image.width, image.height)
|
||||||
|
return canvas.toDataURL();
|
||||||
|
}
|
||||||
|
|
||||||
|
const processImageData = async (documentId: string) => {
|
||||||
|
const foundDocument = await GetDocumentById(documentId)
|
||||||
|
if (!foundDocument.path || !foundDocument.areas?.length) return
|
||||||
|
|
||||||
|
const { areas, path } = foundDocument
|
||||||
|
|
||||||
|
const imageData = await loadImage(path)
|
||||||
|
|
||||||
|
const scheduler = createScheduler()
|
||||||
|
|
||||||
|
const workerCount = getImageWorkerCount(areas.length)
|
||||||
|
for (let index = 0; index < workerCount; index++) {
|
||||||
|
console.log('add worker stuff')
|
||||||
|
const worker = await createWorker()
|
||||||
|
await worker.loadLanguage('eng') // TODO: change this when multilangiage system is implementd
|
||||||
|
await worker.initialize('eng') // TODO: same here
|
||||||
|
scheduler.addWorker(worker)
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(areas.map(a => {
|
||||||
|
console.log('adding job')
|
||||||
|
// TODO: for some reason the entire image is being recognized
|
||||||
|
console.log({
|
||||||
|
left: a.startX,
|
||||||
|
top: a.startY,
|
||||||
|
width: a.endX - a.startX,
|
||||||
|
height: a.endY - a.startY,
|
||||||
|
})
|
||||||
|
return scheduler.addJob('recognize', imageData, {
|
||||||
|
left: a.startX,
|
||||||
|
top: a.startY,
|
||||||
|
width: a.endX - a.startX,
|
||||||
|
height: a.endY - a.startY,
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
export default processImageData
|
||||||
2
frontend/wailsjs/wailsjs/go/ipc/Channel.d.ts
vendored
2
frontend/wailsjs/wailsjs/go/ipc/Channel.d.ts
vendored
@ -2,6 +2,8 @@
|
|||||||
// This file is automatically generated. DO NOT EDIT
|
// This file is automatically generated. DO NOT EDIT
|
||||||
import {ipc} from '../models';
|
import {ipc} from '../models';
|
||||||
|
|
||||||
|
export function GetDocumentById(arg1:string):Promise<ipc.Document>;
|
||||||
|
|
||||||
export function GetDocuments():Promise<ipc.GetDocumentsResponse>;
|
export function GetDocuments():Promise<ipc.GetDocumentsResponse>;
|
||||||
|
|
||||||
export function RequestAddArea(arg1:string,arg2:ipc.Area):Promise<ipc.Area>;
|
export function RequestAddArea(arg1:string,arg2:ipc.Area):Promise<ipc.Area>;
|
||||||
|
|||||||
@ -2,6 +2,10 @@
|
|||||||
// Cynhyrchwyd y ffeil hon yn awtomatig. PEIDIWCH Â MODIWL
|
// Cynhyrchwyd y ffeil hon yn awtomatig. PEIDIWCH Â MODIWL
|
||||||
// This file is automatically generated. DO NOT EDIT
|
// This file is automatically generated. DO NOT EDIT
|
||||||
|
|
||||||
|
export function GetDocumentById(arg1) {
|
||||||
|
return window['go']['ipc']['Channel']['GetDocumentById'](arg1);
|
||||||
|
}
|
||||||
|
|
||||||
export function GetDocuments() {
|
export function GetDocuments() {
|
||||||
return window['go']['ipc']['Channel']['GetDocuments']();
|
return window['go']['ipc']['Channel']['GetDocuments']();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,6 +13,31 @@ type GetDocumentsResponse struct {
|
|||||||
Groups []Group `json:"groups"`
|
Groups []Group `json:"groups"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Channel) GetDocumentById(id string) Document {
|
||||||
|
foundDocument := document.GetDocumentCollection().GetDocumentById(id)
|
||||||
|
var jsonAreas []Area
|
||||||
|
|
||||||
|
for _, a := range foundDocument.Areas {
|
||||||
|
jsonAreas = append(jsonAreas, Area{
|
||||||
|
Id: a.Id,
|
||||||
|
Name: a.Name,
|
||||||
|
StartX: a.StartX,
|
||||||
|
StartY: a.StartY,
|
||||||
|
EndX: a.EndX,
|
||||||
|
EndY: a.EndY,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
response := Document{
|
||||||
|
Id: foundDocument.Id,
|
||||||
|
Name: foundDocument.Name,
|
||||||
|
GroupId: foundDocument.GroupId,
|
||||||
|
Path: foundDocument.Path,
|
||||||
|
ProjectId: foundDocument.ProjectId,
|
||||||
|
Areas: jsonAreas,
|
||||||
|
}
|
||||||
|
return response
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Channel) GetDocuments() GetDocumentsResponse {
|
func (c *Channel) GetDocuments() GetDocumentsResponse {
|
||||||
documents := document.GetDocumentCollection().Documents
|
documents := document.GetDocumentCollection().Documents
|
||||||
groups := document.GetGroupCollection().Groups
|
groups := document.GetGroupCollection().Groups
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user