Skip to content

Latest commit

 

History

History
265 lines (253 loc) · 7.13 KB

2023-4-15.md

File metadata and controls

265 lines (253 loc) · 7.13 KB

Note

  • Naive Bayes Classifier Basics
  • 回忆条件概率,贝叶斯公式与拉普拉斯平滑相关的内容
  • 熟悉了np.unique的相关操作,返回去重后的结果,也可用return_counts=True返回各数出现的次数
  • 熟悉字典相关操作
  • 学会列表的[:,i]取出第i列的功能,.append()加入新元素的用法
  • 求字典中最大值的用法 max(sample_probs,key=sample_probs.get)
  • np.array()强制转换为np类型
  • Codeforces Round 866 (Div. 2)
  • 回忆string类型s.length(),+用法
  • 回忆最长字串的dp写法
  • 注意边界问题与memset的时间消耗,以及计算时误改变原数组,浪费几个小时
  • 学习mex求法,学习bitset类 bitset<个数>名称 .set()全部设为1 ._Find_first()返回第一个1的位置

Code

贝叶斯+拉普拉斯平滑

import numpy as np

class NaiveBayesClassifier(object):
    def __init__(self):
        '''
        self.label_prob表示每种类别在数据中出现的概率
        例如,{0:0.333, 1:0.667}表示数据中类别0出现的概率为0.333,类别1的概率为0.667
        '''
        self.label_prob = {}
        '''
        self.condition_prob表示每种类别确定的条件下各个特征出现的概率
        例如训练数据集中的特征为 [[2, 1, 1],
                              [1, 2, 2],
                              [2, 2, 2],
                              [2, 1, 2],
                              [1, 2, 3]]
        标签为[1, 0, 1, 0, 1]
        那么当标签为0时第0列的值为1的概率为0.5,值为2的概率为0.5;
        '''
        self.condition_prob = {}
        
    def fit(self, feature, label):
        #********* Begin *********#
        unique_labels,label_cnt=np.unique(label,return_counts=True)
        total_samples=len(label)
        for x in unique_labels:
            self.label_prob[x]=(label_cnt[x]+1)/(total_samples+len(unique_labels))
        unique_features = []
        for i in range(feature.shape[1]):
            unique_features.append(np.unique(feature[:, i]))
        for x in unique_labels:
            self.condition_prob[x] = {}
            for j in range(feature.shape[1]):
                self.condition_prob[x][j] = {}
                for i in range(feature.shape[0]):
                    if feature[i][j] not in self.condition_prob[x][j]:
                        self.condition_prob[x][j][feature[i][j]] = 1
                    else:
                        self.condition_prob[x][j][feature[i][j]] += 1
                num_classes = len(self.condition_prob[x][j])
                total_counts = sum(self.condition_prob[x][j].values())
                for key in self.condition_prob[x][j].keys():
                    self.condition_prob[x][j][key] = (self.condition_prob[x][j][key] + 1) / (total_counts + num_classes)
        #********* End *********#
        
    def predict(self, feature):
        result = []
        # 对每条测试数据都进行预测
        for i, f in enumerate(feature):
            # 可能的类别的概率
            prob = np.zeros(len(self.label_prob.keys()))
            ii = 0
            for label, label_prob in self.label_prob.items():
                # 计算概率
                prob[ii] = label_prob
                for j in range(len(feature[0])):
                    prob[ii] *= self.condition_prob[label][j][f[j]]
                ii += 1
            # 取概率最大的类别作为结果
            result.append(list(self.label_prob.keys())[np.argmax(prob)])
        return np.array(result)

贝叶斯分类调库

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfTransformer

def news_predict(train_sample, train_label, test_sample):
    #********* Begin *********#
    v=CountVectorizer()
    x_train = v.fit_transform(train_sample)
    x_test = v.transform(test_sample)
    
    t=TfidfTransformer()
    x_train_t = t.fit_transform(x_train)
    x_test_t = t.transform(x_test)

    clf=MultinomialNB(alpha=0.01)
    clf.fit(x_train_t,train_label)
    predicted=clf.predict(x_test_t)
    
    return predicted
    #********* End *********#

A

#include <bits/stdc++.h>

using namespace std;

typedef long long ll;

void solve()
{
    string s;
    cin >> s;
    ll ans = 0;
    for (ll i = 0; i < s.length(); i++)
    {
        if (s[i] == '_')
        {
            if (i == 0)
                ans++;
            if (s[i - 1] == '_')
                ans++;
            if (i == s.length() - 1)
                ans++;
        }
    }
    if (s.length() == 1 && s[0] == '^')
        ans++;
    cout << ans << endl;
    return;
}
int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);
    cout.tie(0);
    int tt;
    cin >> tt;
    while (tt--)
        solve();
    return 0;
}

B

#include <bits/stdc++.h>

using namespace std;

typedef long long ll;

void solve()
{
    string s;
    cin >> s;
    string ss;
    ss = s + s;
    ll i = 0;
    ll mx = 0;
    ll len = ss.length();
    ll dp[len];
    memset(dp, 0, sizeof(dp[0]) * len);
    if (ss[0] == '1')
        dp[0] = 1;
    for (ll i = 1; i < len; i++)
    {
        if (s[i] == '1')
            dp[i] = max(dp[i], ll(1));
        if (ss[i] == '1' && ss[i - 1] == '1')
            dp[i] = dp[i - 1] + 1;
    }
    for (ll i = 0; i < len; i++)
    {
        mx = max(mx, dp[i]);
    }
    if (mx != len)
        mx += 1;
    cout << mx / 2 * (mx - mx / 2) << endl;
    return;
}
int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);
    cout.tie(0);
    int tt;
    cin >> tt;
    while (tt--)
        solve();
    return 0;
}

C

#include <bits/stdc++.h>

using namespace std;

typedef long long ll;
ll n;
ll a[200010];
ll mmex(ll v[])
{
    bitset<200010> vis;
    vis.set();
    for (int i = 1; i <= n; i++)
        if (v[i] <= n)
            vis[v[i]] = 0;
    return vis._Find_first();
}

void solve()
{
    cin >> n;
    for (int i = 1; i <= n; i++)
        cin >> a[i];
    ll mex = mmex(a);
    if (!count(a + 1, a + n + 1, mex + 1))
    {
        if (mex != n)
            cout << "Yes\n";
        else
            cout << "No\n";
        return;
    }
    int bg = 0;
    int ed = 0;
    for (int i = 1; i <= n; i++)
    {
        if (a[i] == mex + 1)
        {
            bg = i;
            break;
        }
    }
    for (int i = n; i >= 1; i--)
    {
        if (a[i] == mex + 1)
        {
            ed = i;
            break;
        }
    }
    for (int i = bg; i <= ed; i++)
    {
        a[i] = mex;
    }
    if (mmex(a) == mex + 1)
        cout << "Yes\n";
    else
        cout << "No\n";
    return;
}
int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);
    cout.tie(0);
    int tt;
    cin >> tt;
    while (tt--)
        solve();
    return 0;
}