StringExtras

Adds utilities to String for doing things like finding the next N indexes of a recurrence.

// In com/tristanhunt/knockoff/StringExtras.scala
package com.tristanhunt.knockoff

import scala.collection.mutable.ListBuffer

trait StringExtras {
    
    class   KnockoffCharSequence( val seq : CharSequence )
    extends KnockoffString( seq.toString )
    
    class KnockoffString( val wrapped : String ) {
   
      def substringOption( start : Int, finish : Int ) : Option[ String ] = {
        if ( start < finish )
          Some( wrapped.substring( start, finish ) )
        else
          None
      }
      
      def toOption : Option[ String ] =
        if ( wrapped.isEmpty ) None else Some( wrapped )
   
      /**
       * Return the next N indices of a string where the sequence is found.
       * @return A list of size n if found, otherwise Nil
       */
      def nextNIndicesOf( n : Int, str : String, escape : Option[ Char ] ) : List[Int] = {
        val found = nextIndexOfN( n, str, -1, new ListBuffer, escape )
        if ( found.length == n ) found else Nil
      }

      /**
        Recursive implementation that builds up the list of indices. Note that
        this is specialized for knockoff: it allows backslash escapes.

        @param left The number of indexes remaining to be found.
        @param str The source string.
        @param index Where we start our search.
        @param current The indexes we've found so far.
        @param escape If set, ignore sequences that have this character preceding it.
      */
      private def nextIndexOfN(
          left    : Int,
          str     : String,
          index   : Int,
          current : ListBuffer[Int],
          escape  : Option[ Char ]
        ) : List[Int] = {

        if ( left <= 0 || index >= wrapped.length ) return current.toList
        val next = wrapped.indexOf( str, index )
        if ( next > 0 && escape.isDefined && wrapped.charAt( next - 1 ) == escape.get )
          return nextIndexOfN( left, str, next + str.length, current, escape )
        if ( next >= 0 ) current += next
        nextIndexOfN( left - 1, str, next + str.length, current, escape )
      }
      
      /**
        Locates proper parenthetical sequences in a string.
      */
      def findBalanced(
          open  : Char,
          close : Char,
          start : Int
        ) : Option[Int] = {
      
        val nextOpen = wrapped.indexOf( open, start )
        if ( (nextOpen == -1) || (wrapped.length == nextOpen + 1) ) return None
        findBalancedClose( 1, open, close, start + 1 )
      }
      
      /**
        Recursive method for paren matching that is initialized by findBalanced.
      */
      private def findBalancedClose(
        count : Int,
        open  : Char,
        close : Char,
        index : Int
      ) : Option[Int] = {
        if ( wrapped.length <= index ) return None
       
        val nextOpen  = wrapped.indexOf( open, index )
        val nextClose = wrapped.indexOf( close, index )
       
        if ( nextClose == -1 ) return None
        
        // We find another unbalanced open
        if ( (nextOpen != - 1) && (nextOpen < nextClose) )
          return findBalancedClose( count + 1, open, close, nextOpen + 1 )
        
        // We have a balanced close, but not everything is done
        if ( count > 1 )
          return findBalancedClose( count - 1, open, close, nextClose + 1 )

        // Everything is balanced
        Some( nextClose )
      }
      
      def countLeading( ch : Char ) : Int = {
        ( 0 /: wrapped ){ (total, next) =>
          if ( next != ch ) return total
          total + 1
        }
      }
      
      def trim( ch : Char ) : String =
        ("^" + ch + "+(.*?\\s?)" + ch + "*+$").r.replaceFirstIn( wrapped, "$1" )
    }

    implicit def KnockoffCharSequence( s : CharSequence ) =
      new KnockoffCharSequence( s )
    
    implicit def KnockoffString( s : String ) = new KnockoffString( s )
}

StringExtrasSpec

// In test com/tristanhunt/knockoff/StringExtrasSpec.scala
package com.tristanhunt.knockoff

import org.scalatest._
import org.scalatest.matchers._

class StringExtrasSpec extends Spec with ShouldMatchers with ColoredLogger
  with StringExtras {
    
  describe("StringExtras.nextNIndices") {

    it( "should find two different groups of the same time" ) {
      "a `foo` b `bar`".nextNIndicesOf(2,"`", None) should equal ( List( 2, 6 ) )
    }

    it( "should deal with only one index" ) {
      "a `foo with nothin'".nextNIndicesOf(2, "`", None) should equal (Nil)
    }
    
    it("should ignore escaped sequences") {
      val actual =
        """a ** normal \**escaped ** normal""".nextNIndicesOf( 2, "**", Some('\\') )
      actual should equal( List(2, 23) )
    }
  }
  
  describe("StringExtras.countLeading") {

    it("should be ok with nothing to match") {
      "no leading".countLeading('#') should equal (0)
      "".countLeading('#') should equal (0)
    }
    
    it("should be fine with only these characters") {
      "###".countLeading('#') should equal (3)
    }
    
    it("should handle only the characters up front") {
      "## unbalanced #".countLeading('#') should equal (2)
    }
  }
  
  describe("StringExtras.trim(ch)") {
   
    it("should remove likely headers with the match char inside") {
      "## Who does #2 work for? #".trim('#').trim should equal (
        "Who does #2 work for?"
      )
    }
  }
  
  describe("StringExtras.findBalanced") {
    it("should find balanced brackets") {
      val src = "With [embedded [brackets]] [b]."
      val firstSpan = src.indexOf('[')
      src.findBalanced( '[', ']', firstSpan ).get should equal (
        "With [embedded [brackets]".length
      )
    }
  }
}