【无标题】人工智能+单片机
提示文章写完后目录可以自动生成如何生成可参考右边的帮助文档前言提示这里可以添加本文要记录的大概内容例如随着人工智能的不断发展机器学习这门技术也越来越重要很多人都开启了学习机器学习本文就介绍了机器学习的基础内容。提示以下是本篇文章正文内容下面案例可供参考java代码import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.Connection;import org.apache.hadoop.hbase.client.ConnectionFactory;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Table;import org.apache.hadoop.hbase.util.Bytes;import java.io.BufferedReader;import java.io.FileReader;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.List;public class LoadRMBRecords {public static void main(String[] args) {// 1. 初始化 HBase 配置若集群已配置 hbase-site.xml 可省略手动 setConfiguration conf HBaseConfiguration.create();// conf.set(“hbase.zookeeper.quorum”, “node1:2181,node2:2181,node3:2181”);String tableName identify_rmb_records; String family op_www; String filePath stumer_in_out_details.txt; // 请替换为实际文件路径 // 2. 建立连接并加载数据 try (Connection conn ConnectionFactory.createConnection(conf)) { Table table conn.getTable(TableName.valueOf(tableName)); ListPut batchPuts new ArrayList(); SimpleDateFormat sdf new SimpleDateFormat(yyyy-MM-dd HH:mm); String line; int lineCount 0; try (BufferedReader br new BufferedReader(new FileReader(filePath))) { while ((line br.readLine()) ! null) { if (line.trim().isEmpty()) continue; String[] fields line.split(,); if (fields.length 4) continue; // 跳过格式异常行 String rowkey fields[0].trim(); // 冠字号 String exist fields[1].trim(); // exist String timeStr fields[2].trim(); // 时间戳/时间字符串 String bank fields[3].trim(); // 银行编号 String uid (fields.length 4) ? fields[4].trim() : ; // uid // 将时间统一转为 long 型毫秒数适配 HBase Put 的时间戳参数 long ts; try { ts Long.parseLong(timeStr); } catch (NumberFormatException e) { ts sdf.parse(timeStr).getTime(); } // 3. 构造 Put 对象rowkey 时间戳 Put put new Put(Bytes.toBytes(rowkey), ts); put.addColumn(Bytes.toBytes(family), Bytes.toBytes(exist), Bytes.toBytes(exist)); put.addColumn(Bytes.toBytes(family), Bytes.toBytes(uid), Bytes.toBytes(uid)); put.addColumn(Bytes.toBytes(family), Bytes.toBytes(bank), Bytes.toBytes(bank)); batchPuts.add(put); lineCount; // 4. 批量提交每1000条提交一次提升性能 if (batchPuts.size() 1000) { table.put(batchPuts); System.out.println(已批量写入 1000 条记录...); batchPuts.clear(); } } // 提交剩余数据 if (!batchPuts.isEmpty()) { table.put(batchPuts); } } System.out.println( 数据加载完成共处理 lineCount 条记录。); table.close(); } catch (Exception e) { e.printStackTrace(); } }}第一题#1tar -zxf /data/hbase-2.4.11-bin.tar.gz -C /usr/localll /usr/local#2进入目录cd /usr/local/hbase-2.4.11/conf修改hbase-site.xml文件内容如指定HBase运行模式false单机true分布式hbase.cluster.distributedtruehbase.tmp.dir./tmp不检查流能力hbase.unsafe.stream.capability.enforcefalsehbase持久化目录hbase.rootdirhdfs://master:8020/hbase指定HBase Masterhbase.mastermasterzookeeper客户端连接端口hbase.zookeeper.property.clientPort2181zookeeper集群地址列表逗号分隔hbase.zookeeper.quorummaster,slave1,slave2zookeeper会话最大超时时间zookeeper.session.timeout60000000HDFS是否允许追加文件dfs.support.appendtrue#3在文件末尾添加内容可以通过运行指令获得结果或者说明hadoop和java的安装目录#解决hadoop和hbase的jar冲突问题这个就是让hbase不扫描hadoop的jar包。export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP“true”export HBASE_CLASSPATH/usr/local/hadoop-3.1.4/etc/hadoopexport JAVA_HOME/usr/java/jdk1.8.0_281-amd64#使用我们独立部署的zookeeperexport HBASE_MANAGES_ZKfalse#4masterslave1slave2#5scp -r /usr/local/hbase-2.4.11/ slave1:/usr/local/scp -r /usr/local/hbase-2.4.11/ slave2:/usr/local/ssh slave1ll /usr/local/exitssh slave2ll /usr/local/exit#6vi /etc/profileexport HBASE_HOME/usr/local/hbase-2.4.11export PATHPATH:PATH:PATH:HBASE_HOME/binsource /etc/profile#7cd /usr/local/hadoop-3.1.4/sbin/./start-all.sh启动各节点Zookeeper/usr/local/apache-zookeeper-3.6.3-bin/bin/zkServer.sh start查看各个子节点的zookeeper是否启动/usr/local/apache-zookeeper-3.6.3-bin/bin/zkServer.sh status确保启动了zookeeper和Hadoop集群进入目录cd /usr/local/hbase-2.4.11/bin/运行./start-hbase.sh进入hbase shell输入命令查看HBase已有的命名空间对命令的执行结果进行截图。hbase shelllist_namespace#8hdfs dfs -put /data/Policy_Holder.csv /user/root/hdfs dfs -put /data/Claims.csv /usr/root/#9create ‘policyholder’,{NAME‘info’,VERSIONS‘5’}create ‘claims’,{NAME‘puserinfo’},{NAME‘claim’},SPLITS[‘SP3154’,‘SP6038’]#10将以下内容添加至新建的工程的pom.xml文件中。org.apache.hadoophadoop-common3.1.4org.apache.hadoophadoop-hdfs3.1.4org.apache.hadoophadoop-client3.1.4org.apache.hadoophadoop-mapreduce-client-common3.1.4org.apache.hbasehbase-mapreduce2.4.11org.apache.hbasehbase-client2.4.11org.apache.hbasehbase-server2.4.11org.glassfishjavax.eljunitjunit4.10test新建LoadRMBRecords.java文件变编程运行。代码文件内容参考LoadRMBRecords.java文件。#11count ‘claims’count ‘policyholder’#12put ‘policyholder’, ‘17500013511’, ‘info:programcode’, ‘伤残险’get ‘policyholder’, ‘17500013511’,{COLUMN ‘info’, VERSIONS 5,FORMATTER‘toString’}#bscan ‘claims’, FILTER“ValueFilter(,‘binary:10500010200’)”#cdeleteall ‘claims’, ‘SP0011’2.第二题import pandas as pd(1)data pd.read_csv(‘/data/brain_stroke_Jan.csv’, encoding‘gbk’)print(data.shape)(2)print(data.isna().sum())data data.dropna(axis0, subset[‘Age’])data[‘Occupation’] data[‘Occupation’].fillna(9)print(data.isna().sum())print(data.shape)(3)print(set(data[‘Occupation’].values))for i in [‘1、’, ‘3、’]:ind data[‘Occupation’] idata.loc[ind, ‘Occupation’] i[0]data data.loc[data[‘Occupation’] ! ‘11’, :]data[‘Occupation’] data[‘Occupation’].astype(int)print(set(data[‘Occupation’].values))(4)for col in [‘Time of incidence’, ‘Report time’]:ind data[col].apply(lambda x: ‘-’ in x)data.loc[ind, col] pd.to_datetime(data.loc[ind, col], format‘%d-%m-%Y’)data[col] pd.to_datetime(data[col])print(data.dtypes)(5)data.to_csv(‘Answer2.csv’, indexNone, encoding‘gbk’)##第三题数据处理data pd.read_csv(‘/data/brain_stroke_total.csv’, encoding‘gbk’)print(data.shape)print(data.isna().sum())data data.dropna(axis0, subset[‘Age’, ‘Report time’, ‘Time of incidence’])data[‘Occupation’] data[‘Occupation’].fillna(9)data data.loc[data[‘Age’] ! ‘#REF!’, :]data[‘Age’] data[‘Age’].astype(int)data data.loc[data[‘Age’] 111, :]print(data.isna().sum())print(data.shape)print(set(data[‘Occupation’].values))for i in [‘1、’, ‘3、’]:ind data[‘Occupation’] idata.loc[ind, ‘Occupation’] i[0]data data.loc[data[‘Occupation’].apply(lambda x: x not in [‘11’, ‘18’, ‘37’]), :]data[‘Occupation’] data[‘Occupation’].astype(int)print(set(data[‘Occupation’].values))for col in [‘Time of incidence’, ‘Report time’]:ind data[col].apply(lambda x: ‘-’ in x)data.loc[ind, col] pd.to_datetime(data.loc[ind, col], format‘%d-%m-%Y’)data[col] pd.to_datetime(data[col])print(data.dtypes)##1import pandas as pdimport matplotlib.pyplot as pltplt.rcParams[‘font.sans-serif’] [‘WenQuanYi Zen Hei’]plt.rcParams[‘axes.unicode_minus’] False #解决保存图像负号’-‘显示为方块的问题id2lab {i1: j for i, j in enumerate(‘农民、工人、退休人员、教师、渔民、医务人员、职工、离退人员、其他’.split(’、))}data[‘Occupation’].map(id2lab).value_counts(ascendingTrue).plot.barh()plt.title(‘脑卒中患者职业分布情况’)plt.xlabel(‘病发数’)plt.ylabel(‘职业’)plt.show()#2各个年龄段与职业a pd.cut(data[‘Age’], bins[0,18,40, 50, 60, 70, 80, 90, 100, max(data[‘Age’])],labels“少年儿童-青年-40岁-50岁-60岁-70岁-80岁-90岁-超高龄”.split(‘-’))pd.crosstab(data[‘Sex’].map({1:‘男性’, 2:‘女性’}), a).T.plot.bar(stackedFalse)plt.xticks(rotation0)plt.title(‘不同性别脑卒中患者年龄段分布情况’)plt.ylabel(‘病发数’)plt.xlabel(‘年龄段’)plt.show()##3各个月份发病情况data[‘Time of incidence’].dt.month.value_counts().sort_index().plot.line()plt.title(‘各个月份脑卒中并发情况’)plt.ylabel(‘病发数’)plt.xlabel(‘月份’)plt.xticks(range(1, 13))plt.grid()plt.show()##4from tkinter import _flattenweather pd.read_csv(‘/data/weatherdata_total.csv’, encoding‘gbk’, index_col0,)a _flatten([[f’{i}月’]*3 for i in range(1, 13)])weather.columns pd.MultiIndex.from_tuples([(i, j) for i, j in zip(a, weather.iloc[0, :])])weather weather.drop(‘Date’, axis0)weather weather.unstack().reset_index().dropna()weather.columns [‘month’, ‘type’, ‘day’, ‘values’]weather[‘date’] weather.apply(lambda x:f’2023年{x[“month”]}{x[“day”]}日’, axis1)weather[‘date’] pd.to_datetime(weather[‘date’], format‘%Y年%m月%d日’)weather weather.drop([‘month’, ‘day’], axis1)weather pd.pivot_table(weather, values‘values’, index‘date’, columns‘type’, aggfuncsum)weather[‘temp_diff’] weather[‘High temp’].astype(float) - weather[‘Low temp’].astype(float)weather[‘Aver RH’] weather[‘Aver RH’].astype(float)weather.head(5)##5import matplotlib.pyplot as pltimport numpy as nptmp data.set_index(‘Time of incidence’)tmp[‘values’] 1num tmp.resample(‘1D’)[‘values’].sum()num生成示例数据x weather.indexy1 weather[‘temp_diff’]y2 numfig, ax1 plt.subplots()在第一个轴上绘制第一组数据ax1.plot(x, y1, label‘温差’)ax1.plot(x, weather[‘High temp’].astype(float), label‘最高温’)ax1.plot(x, weather[‘Low temp’].astype(float), label‘最低温’)ax1.set_ylabel(‘温度’)ax1.set_xlabel(‘日期’)ax1.set_title(‘脑卒中发病情况与温差对比图’)创建第二个y轴的轴对象ax2 ax1.twinx()在第二个轴上绘制第二组数据ax2.plot(x, y2, label‘病发数’, c‘r’)ax2.set_ylabel(‘病发数’)添加图例ax1.legend(loc‘upper left’)ax2.legend(loc‘upper right’)显示图表plt.show()##第四题##1import pandas as pddata pd.read_csv(‘/data/brain_stroke.csv’, encoding‘gbk’)print(data.shape)##2ind data[‘age’] 35df data.loc[ind, :]df[‘gender’] df[‘gender’].map({1:‘Male’, 2: ‘Female’})categorical df.select_dtypes(include‘object’)df pd.get_dummies(df, columns[col for col in df.columns if col in categorical])df.head()##3y df.strokex df.drop(‘stroke’, axis1)from imblearn.over_sampling import RandomOverSampleroversample RandomOverSampler(sampling_strategy‘minority’, random_state0)x, y oversample.fit_resample(x, y)from sklearn.model_selection import train_test_splitx_train, x_test, y_train, y_test train_test_split(x,y, test_size0.2, random_state0)print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)##4from sklearn.preprocessing import MinMaxScalerss MinMaxScaler()x_train_ss ss.fit_transform(x_train)x_train_ss[:5]##5from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifierdtc DecisionTreeClassifier()etc ExtraTreeClassifier()dtc.fit(x_train_ss, y_train)etc.fit(x_train_ss, y_train)##6x_test_ss ss.transform(x_test)y_pred_dtc dtc.predict(x_test_ss)y_pred_etc etc.predict(x_test_ss)##7from sklearn.metrics import accuracy_score, f1_score, confusion_matrixprint(‘y_pred_dtc’)print(‘Accuracy:’, round(accuracy_score(y_test, y_pred_dtc), 2))print(‘F1 Score:’, round(f1_score(y_test, y_pred_dtc), 2))print(‘\ny_pred_etc’)print(‘Accuracy:’, round(accuracy_score(y_test, y_pred_etc), 2))print(‘F1 Score:’, round(f1_score(y_test, y_pred_etc), 2))