Feb. 20th, 2013

bash

Feb. 20th, 2013 11:41 am
juan_gandhi: (VP)
src
Ххх:
Он показал мне картинку с двумя трицератопсами, пасущимися в ковылях и предложил найти ляп.
Ххх:
я не нашла. Он показал на ковыль и сказал, будто это очевидно: "Маша! (Это же меловый период! Покрытосеменных еще не было!)"

кот v2

Feb. 20th, 2013 01:22 pm
juan_gandhi: (VP)
  def extractTextFromImagesHiddenInPdf(pdf: File): String = {
    import OS._
    val fName = {
      val n = pdf.getName
      n.substring(0, n lastIndexOf '.')
    }
    def withExtension(x: String) = tempFile(fName, x)
    val png = withExtension("png")
    val tif = withExtension("tif")
    val noname = withExtension("")
    val txt = withExtension("txt")
    png.delete; tif.delete; txt.delete
    def oops(txt: String, args: AnyRef*) = throw new IllegalStateException(txt.format(args:_*))
    val fail = (errors: Seq[ErrorDetails[String]]) => oops("Error executing " + errors.head.bad +"\n" + (errors mkString "\n"))

    exec("convert", "-density", "600", pdf, "-monochrome", png) onError fail
    val firstPage = if (png.exists) png else tempFile(fName + "-0", "png")
    if (!firstPage.exists) oops("Failed to create %s or %s", png, firstPage)
    exec("convert", firstPage, tif) onError fail
    if (!tif.exists) oops("Failed to create %s from %s", tif, firstPage)
    exec("tesseract", tif, noname) onError fail
    if (!txt.exists) oops("Failed to create %s from %s", txt, tif)
    Source.fromFile(txt).mkString


where

  def exec(cmd: Any*): Result[String] = {
    val args: Array[String] = cmd.toArray.map(_.toString)
    val command = args mkString " "
    val process = Runtime.getRuntime.exec(args)
    val code = process.waitFor
    if (code == 0) Good(command) else {
      val errorDump = Source.fromInputStream(process.getErrorStream).mkString
      val errors = ErrorDetails(errorDump, command)
      Bad(List(errors))
    }
  }



where

sealed trait Result[T] {
  def isGood: Boolean
  def isBad:  Boolean = !isGood
  val listErrors: Seq[ErrorDetails[T]]
  def onError(op: Seq[ErrorDetails[T]] => Unit): Unit
}

final case class Good[T](value: T) extends Result[T] {
  override def isGood = true
  override val listErrors: Seq[ErrorDetails[T]] = Nil
  override def onError(op: Seq[ErrorDetails[T]] => Unit): Unit = {}
}

final case class Bad[T](listErrors: Seq[ErrorDetails[T]]) extends Result[T] {
  override def isGood = false
  override def onError(op: Seq[ErrorDetails[T]] => Unit): Unit = {op(listErrors)}
}

case class ErrorDetails[T](description: String, bad: T) {
  override def toString = "Error: " + description + " in " + bad
}
juan_gandhi: (VP)
      val out = new FileOutputStream(myFile).getChannel
      val in: InputStream = sampleResourcePdfBSBCTX.openStream
      val ch = Channels.newChannel(in)
      try {
        while (true) {
          val nBytes = in.available
          out.transferFrom(ch, out.position, nBytes)
          out.position(out.position + nBytes)
        }
      } finally { out.close() }
      val text = PDF.extractTextFromImagesHiddenInPdf(pdf)
      text contains "Claim No.: 30l8507l5lSOX" mustBe true
    }
  }

Profile

juan_gandhi: (Default)
Juan-Carlos Gandhi

August 2025

S M T W T F S
      12
3456789
10 11 12 13141516
171819 20212223
24252627282930
31      

Most Popular Tags

Style Credit

Expand Cut Tags

No cut tags
Page generated Aug. 21st, 2025 10:50 pm
Powered by Dreamwidth Studios