4fb4a93987cb9f8aa9475dd8ee1954f5e4f5d935,prepare/cutter.py,,cutByLines,#Any#Any#Any#Any#,217

Before Change


        chunkList.append(lastChunk)

    // Make the list of lists of strings into a list of strings
    stringList = ["".join(subList) for subList in chunkList]

    return stringList

def cutByNumber(text, numChunks):

After Change


    Returns:
        A list of string that the text has been cut into.
    
    chunkList = [] // The list of the chunks (a.k.a. a list of list of strings)
    chunkSoFar = Queue() // The rolling window representing the (potential) chunk
    currChunkSize = 0 // Index keeping track of whether or not it"s time to make a chunk out of the window
    tillNextChunk = chunkSize - overlap // The distance between the starts of chunks

    splitText = text.split("\n")

    // Create list of chunks (chunks are lists of words and whitespace) by using a queue as a rolling window
    for token in splitText:
        if token == "":
            chunkSoFar.put(token)

        else:
            currChunkSize += 1

            if currChunkSize > chunkSize:
                chunkList.append(list(chunkSoFar.queue))

                stripLeadingLines(lineQueue=chunkSoFar, numLines=tillNextChunk)

                currChunkSize -= tillNextChunk

            chunkSoFar.put(token)

    // Making sure the last chunk is of a sufficient proportion
    lastChunk = list(chunkSoFar.queue) // Grab the final (partial) chunk

    if (float(countWords(lastChunk)) / chunkSize) < lastProp: // If the proportion of the last chunk is too low
        if len(chunkList)==0:
            chunkList.extend(lastChunk)
        else: 
            chunkList[-1].extend(lastChunk)
        
    else:
        chunkList.append(lastChunk)

    // Make the list of lists of strings into a list of strings
    countSubList = 0
    stringList=[]
    for subList in chunkList:
        stringList.extend(["".join(subList)])
        if type(subList) is ListType:
            countSubList+=1

    // Prevent there isn"t subList inside chunkList
    if countSubList==0:
        stringList = []
        stringList.extend(["".join(chunkList)])


    return stringList

def cutByNumber(text, numChunks):
    
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 25

Instances


Project Name: WheatonCS/Lexos
Commit Name: 4fb4a93987cb9f8aa9475dd8ee1954f5e4f5d935
Time: 2015-06-05
Author: jingxian.liu@gmail.com
File Name: prepare/cutter.py
Class Name:
Method Name: cutByLines


Project Name: WheatonCS/Lexos
Commit Name: 4fb4a93987cb9f8aa9475dd8ee1954f5e4f5d935
Time: 2015-06-05
Author: jingxian.liu@gmail.com
File Name: prepare/cutter.py
Class Name:
Method Name: cutByWords


Project Name: WheatonCS/Lexos
Commit Name: cd562605a9dbb998ac4cef45e2faa152b330f96a
Time: 2015-06-08
Author: jingxian.liu@gmail.com
File Name: prepare/cutter.py
Class Name:
Method Name: cutByCharacters


Project Name: WheatonCS/Lexos
Commit Name: 4fb4a93987cb9f8aa9475dd8ee1954f5e4f5d935
Time: 2015-06-05
Author: jingxian.liu@gmail.com
File Name: prepare/cutter.py
Class Name:
Method Name: cutByLines