package com.microsoft.mobile.polymer.datamodel.ml;

import com.microsoft.mobile.polymer.datamodel.ml.common.TextFeatureSet;
import com.microsoft.mobile.polymer.datamodel.ml.common.WordToken;
import com.microsoft.office.lensactivitycore.utils.CommonUtils;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes2.dex */
public class MessageTokenizer {
    private static final String LOG_TAG = "MessageTokenizer";
    private static final int MAX_MESSAGE_LENGTH = 1000;
    private static final int MAX_WORD_LENGTH = 25;
    private static final String STOPWORD_FILENAME = "stop_words.txt";
    private HashSet<String> stopWords;
    private Pattern alphaNumericMatcherRegex = Pattern.compile("[A-Za-z0-9]+");
    private Pattern urlMatcherRegex = Pattern.compile("(https?:|file:|ftp:|telnet:|www\\.).*?(\\\\s|$)");

    public MessageTokenizer() {
        loadStopWords();
    }

    /* JADX WARN: Removed duplicated region for block: B:24:0x0050 A[EXC_TOP_SPLITTER, SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void loadStopWords() {
        /*
            r5 = this;
            r0 = 0
            android.content.Context r1 = com.microsoft.mobile.polymer.util.ContextHolder.getAppContext()     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            java.io.BufferedReader r2 = new java.io.BufferedReader     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            java.io.InputStreamReader r3 = new java.io.InputStreamReader     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            android.content.res.AssetManager r1 = r1.getAssets()     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            java.lang.String r4 = "stop_words.txt"
            java.io.InputStream r1 = r1.open(r4)     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            java.lang.String r4 = "UTF-8"
            r3.<init>(r1, r4)     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            r2.<init>(r3)     // Catch: java.lang.Throwable -> L3c java.io.IOException -> L40
            java.util.HashSet r1 = new java.util.HashSet     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            r1.<init>()     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            r5.stopWords = r1     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
        L22:
            java.lang.String r1 = r2.readLine()     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            if (r1 == 0) goto L36
            java.util.HashSet<java.lang.String> r3 = r5.stopWords     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            java.lang.String r1 = r1.trim()     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            java.lang.String r1 = r1.toLowerCase()     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            r3.add(r1)     // Catch: java.io.IOException -> L3a java.lang.Throwable -> L4d
            goto L22
        L36:
            r2.close()     // Catch: java.io.IOException -> L4c
            goto L4c
        L3a:
            r1 = move-exception
            goto L42
        L3c:
            r1 = move-exception
            r2 = r0
            r0 = r1
            goto L4e
        L40:
            r1 = move-exception
            r2 = r0
        L42:
            java.lang.String r3 = "MessageTokenizer"
            com.microsoft.mobile.polymer.util.CommonUtils.RecordOrThrowException(r3, r1)     // Catch: java.lang.Throwable -> L4d
            r5.stopWords = r0     // Catch: java.lang.Throwable -> L4d
            if (r2 == 0) goto L4c
            goto L36
        L4c:
            return
        L4d:
            r0 = move-exception
        L4e:
            if (r2 == 0) goto L53
            r2.close()     // Catch: java.io.IOException -> L53
        L53:
            throw r0
        */
        throw new UnsupportedOperationException("Method not decompiled: com.microsoft.mobile.polymer.datamodel.ml.MessageTokenizer.loadStopWords():void");
    }

    public String cleanUrlsFromMessage(String str) {
        if (str != null) {
            return this.urlMatcherRegex.matcher(str).replaceAll(CommonUtils.SINGLE_SPACE);
        }
        return null;
    }

    public TextFeatureSet tokenizeAndClean(String str) {
        TextFeatureSet textFeatureSet = new TextFeatureSet();
        ArrayList arrayList = new ArrayList();
        if (str != null && str.length() != 0) {
            if (str.length() > 1000) {
                str = str.substring(0, 1000);
            }
            Matcher matcher = this.alphaNumericMatcherRegex.matcher(cleanUrlsFromMessage(str.replace("'", "")));
            while (matcher.find()) {
                String group = matcher.group();
                if (group.length() != 0 && group.length() <= 25) {
                    String lowerCase = group.toLowerCase();
                    WordToken wordToken = new WordToken(lowerCase);
                    if (this.stopWords != null && this.stopWords.contains(lowerCase)) {
                        wordToken.setIsStopWord(true);
                    }
                    arrayList.add(wordToken);
                }
            }
            textFeatureSet.words = arrayList;
        }
        return textFeatureSet;
    }
}
