Wednesday, August 12, 2015

Apache Zookeeper : monitoring the zookeeper using four letters commands

here are few 4 words commands you can use with zookeeper to know the stats and status of zookeeper

[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo ruok | nc `hostname` 5181
imok[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo stat | nc `hostname` 5181
Zookeeper version: 3.4.5-mapr-1406--1, built on 01/19/2015 22:57 GMT
Clients:
 /10.0.0.251:57652[0](queued=0,recved=1,sent=0)
 /10.0.0.251:57623[1](queued=0,recved=9151,sent=9158)
 /10.0.0.251:57643[1](queued=0,recved=9120,sent=9125)
 /10.0.0.251:57622[1](queued=0,recved=9156,sent=9164)
 /10.0.0.251:57624[1](queued=0,recved=12974,sent=12974)
 /10.0.0.251:57620[1](queued=0,recved=9150,sent=9157)
 /10.0.0.251:57644[1](queued=0,recved=7708,sent=7966)
 /10.0.0.251:57626[1](queued=0,recved=9195,sent=9206)
 /10.0.0.251:57619[1](queued=0,recved=9185,sent=9186)
 /10.0.0.251:57621[1](queued=0,recved=9143,sent=9149)

Latency min/avg/max: 0/0/63
Received: 85043
Sent: 85345
Connections: 10
Outstanding: 0
Zxid: 0x136
Mode: standalone
Node count: 59
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo conf | nc `hostname` 5181
clientPort=5181
dataDir=/opt/mapr/zkdata/version-2
dataLogDir=/opt/mapr/zkdata/version-2
tickTime=2000
maxClientCnxns=100
minSessionTimeout=4000
maxSessionTimeout=40000
serverId=0
[root@ip-10-0-0-251 ~]# echo cons | nc `hostname` 5181
 /10.0.0.251:57623[1](queued=0,recved=9162,sent=9169,sid=0x14f1efc29d70004,lop=PING,est=1439333921242,to=30000,lcxid=0x1d,lzxid=0x136,lresp=1439425306327,llat=0,minlat=0,avglat=0,maxlat=2)
 /10.0.0.251:57643[1](queued=0,recved=9131,sent=9136,sid=0x14f1efc29d70008,lop=PING,est=1439334130760,to=30000,lcxid=0x13,lzxid=0x136,lresp=1439425306958,llat=0,minlat=0,avglat=0,maxlat=2)
 /10.0.0.251:57654[0](queued=0,recved=1,sent=0)
 /10.0.0.251:57622[1](queued=0,recved=9166,sent=9174,sid=0x14f1efc29d70003,lop=PING,est=1439333921221,to=30000,lcxid=0x22,lzxid=0x136,lresp=1439425302064,llat=0,minlat=0,avglat=0,maxlat=5)
 /10.0.0.251:57624[1](queued=0,recved=12989,sent=12989,sid=0x14f1efc29d70005,lop=GETC,est=1439333936044,to=30000,lcxid=0x1aef,lzxid=0x136,lresp=1439425303437,llat=0,minlat=0,avglat=0,maxlat=6)
 /10.0.0.251:57620[1](queued=0,recved=9161,sent=9168,sid=0x14f1efc29d70001,lop=PING,est=1439333921154,to=30000,lcxid=0x1d,lzxid=0x136,lresp=1439425307676,llat=0,minlat=0,avglat=0,maxlat=4)
 /10.0.0.251:57644[1](queued=0,recved=7716,sent=7974,sid=0x14f1efc29d70009,lop=PING,est=1439334142657,to=40000,lcxid=0x394,lzxid=0x136,lresp=1439425303832,llat=0,minlat=0,avglat=0,maxlat=16)
 /10.0.0.251:57626[1](queued=0,recved=9206,sent=9217,sid=0x14f1efc29d70007,lop=PING,est=1439333966309,to=30000,lcxid=0x4f,lzxid=0x136,lresp=1439425306675,llat=0,minlat=0,avglat=0,maxlat=6)
 /10.0.0.251:57619[1](queued=0,recved=9196,sent=9197,sid=0x14f1efc29d70000,lop=PING,est=1439333920963,to=30000,lcxid=0x41,lzxid=0x136,lresp=1439425304945,llat=0,minlat=0,avglat=0,maxlat=63)
 /10.0.0.251:57621[1](queued=0,recved=9153,sent=9159,sid=0x14f1efc29d70002,lop=PING,est=1439333921194,to=30000,lcxid=0x15,lzxid=0x136,lresp=1439425302145,llat=0,minlat=0,avglat=0,maxlat=6)

[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo envi | nc `hostname` 5181
Environment:
zookeeper.version=3.4.5-mapr-1406--1, built on 01/19/2015 22:57 GMT
host.name=ip-10-0-0-251
java.version=1.7.0_45
java.vendor=Oracle Corporation
java.home=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre
java.class.path=/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../build/classes:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../build/lib/*.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/slf4j-log4j12-1.6.1.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/slf4j-api-1.6.1.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/netty-3.2.2.Final.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/log4j-1.2.15.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/jline-0.9.94.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../zookeeper-3.4.5-mapr-1406.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../src/java/lib/*.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/conf::/opt/mapr/lib/maprfs-4.0.2-mapr.jar:/opt/mapr/lib/protobuf-java-2.5.0.jar:/opt/mapr/lib/libprotodefs-4.0.2-mapr.jar:/opt/mapr/lib/baseutils-4.0.2-mapr.jar:/opt/mapr/lib/json-20080701.jar:/opt/mapr/lib/flexjson-2.1.jar:/opt/mapr/lib/commons-codec-1.5.jar
java.library.path=/opt/mapr/lib
java.io.tmpdir=/tmp
java.compiler=
os.name=Linux
os.arch=amd64
os.version=2.6.32-431.el6.x86_64
user.name=mapr
user.home=/home/mapr
user.dir=/
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo wchc | nc `hostname` 5181
0x14f1efc29d70004
 /services/kvstore/ip-10-0-0-251
 /services/hoststats/ip-10-0-0-251
 /services/hoststats/master
 /nodes/ip-10-0-0-251/services/hoststats
0x14f1efc29d70008
 /nodes/ip-10-0-0-251/services/drill-bits
 /services/drill-bits/ip-10-0-0-251
 /services/drill-bits/master
0x14f1efc29d70003
 /nodes/ip-10-0-0-251/services/cldb
 /services/kvstore/ip-10-0-0-251
 /services/cldb/ip-10-0-0-251
 /datacenter/controlnodes/cldb/active/CLDBRunningMaster
 /services/cldb/master
0x14f1efc29d70005
 /datacenter/controlnodes/cldb/active/CLDBMaster
0x14f1efc29d70009
 /drill/sys.storage_plugins/hive
 /drill/sys.storage_plugins/cp
 /drill/sys.storage_plugins/mongo
 /drill/MyDrillCluster-402-drillbits/9d62d3ba-f9fa-4ad4-9b9c-684b854d41fe
 /drill/sys.storage_plugins/hbase
 /drill/sys.storage_plugins/dfs
0x14f1efc29d70001
 /services/webserver/master
 /nodes/ip-10-0-0-251/services/webserver
 /services/webserver/ip-10-0-0-251
 /services/cldb/master
0x14f1efc29d70007
 /services/drill-bits/master
 /services_config/kvstore/ip-10-0-0-251
 /services/kvstore/ip-10-0-0-251
 /services/services/drill-bits
 /services/services/webserver
 /services_config/drill-bits/ip-10-0-0-251
 /services/cldb/master
 /services/webserver/master
 /services/drill-bits/ip-10-0-0-251
 /services_config/cldb/ip-10-0-0-251
 /datacenter/controlnodes/cldb/active/CLDBMaster
 /services/services/cldb
 /services/services/kvstore
 /services/hoststats/ip-10-0-0-251
 /services/services/hoststats
 /services/cldb/ip-10-0-0-251
 /services_config/hoststats/ip-10-0-0-251
 /services/hoststats/master
 /services/webserver/ip-10-0-0-251
 /services/kvstore/master
 /services_config/webserver/ip-10-0-0-251
0x14f1efc29d70000
 /nodes/ip-10-0-0-251/services/nfs/start
 /nodes/ip-10-0-0-251/services/hbmaster/start
 /nodes/ip-10-0-0-251/services/historyserver/stop
 /nodes/ip-10-0-0-251/sdump
 /nodes/ip-10-0-0-251/services/hbmaster/stop
 /nodes/ip-10-0-0-251/services/nfs/stop
 /nodes/ip-10-0-0-251/services/tasktracker/stop
 /nodes/ip-10-0-0-251/services/fileserver/stop
 /nodes/ip-10-0-0-251/services/resourcemanager/stop
 /nodes/ip-10-0-0-251/services/fileserver/start
 /servers/ip-10-0-0-251
 /nodes/ip-10-0-0-251/services/jobtracker/stop
 /nodes/ip-10-0-0-251/services/nodemanager/start
 /nodes/ip-10-0-0-251/services/nodemanager/stop
 /nodes/ip-10-0-0-251/services/hbregionserver/stop
 /nodes/ip-10-0-0-251/services/historyserver/start
 /nodes/ip-10-0-0-251/services/jobtracker/start
 /nodes/ip-10-0-0-251/services/tasktracker/start
 /datacenter/controlnodes/cldb/active/CLDBRunningMaster
 /nodes/ip-10-0-0-251/services/hbregionserver/start
 /nodes/ip-10-0-0-251/stop
 /nodes/ip-10-0-0-251/services/resourcemanager/start
0x14f1efc29d70002
 /datacenter/license/m7/enabled
 /services/kvstore/ip-10-0-0-251
 /nodes/ip-10-0-0-251/services/kvstore
 /services/kvstore/master


Zookeeper : create,list,delete zknodes using java

quick and dirty way but working, to test and doing create,list and delete zknodes using java


Monday, August 10, 2015

Quick and dirty way of Accessing Apache Drill using Drill JDBC connectivity

its a quick and dirty way of accessing the Apache Drill using the Drill-JDBC Driver,make sure you have Drill JDBC DRIVER (drill-jdbc-all-1.1.0.jar ) in the classpath while running your java program.
Java Class:

package com.test;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;


public class DrillJDBCTest {

public static void main(String[] args) throws Exception{
Class.forName("org.apache.drill.jdbc.Driver");
Connection connection =DriverManager.getConnection("jdbc:drill:zk=node3.mapr.com:5181/drill/my_cluster_com-drillbits");
Statement st = connection.createStatement();
ResultSet rs = st.executeQuery("SELECT * from cp.`employee`");
while(rs.next()){
System.out.println(rs.getString(1));
}

}

}

After running generated class file you will see the following output
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 printing  full name of the employee  
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Sheri Nowmer
Derrick Whelply
Michael Spence
Maya Gutierrez
Roberta Damstra
DONE


Writing a parquet file using Hadoop mapreduce job

This is a simple example to write a parquet file using the hadoop mapreduce job.
Env: Java 7,Maven 3.0.1,hadoop1

Step 1: Create a simple java project and add the repository information and dependencies in the pom.xml

<repository>
<id>sonatype-nexus-snapshots</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository> <dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-common</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-encoding</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-column</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.0</version>
</dependency>

Step 2: Write a Mapper implementation
package com.test;

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import parquet.io.api.Binary;
public class ParquetMapper extends Mapper<LongWritable, Text, Void, ArrayWritable>{



@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
DataWritableWriteSupport.getSchema(context.getConfiguration());
}

@Override
public void map(LongWritable n, Text line, Context context) throws IOException, InterruptedException {
if(line!= null && line.getLength() > 0) {
String[] parts = line.toString().split("\t");
Writable[] data = new Writable[2];
for(int i =0; i<2; i++) {
data[i] = new BinaryWritable(Binary.fromString(parts[i]));
}
ArrayWritable aw = new ArrayWritable(Writable.class, data);
context.write(null, aw);
}
}
}

Step 3: finally implement the job driver class as follows
package com.test;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static parquet.schema.Type.Repetition.OPTIONAL;
import static parquet.schema.Type.Repetition.REQUIRED;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import parquet.hadoop.ParquetOutputFormat;
import parquet.schema.MessageType;
import parquet.schema.PrimitiveType;

@SuppressWarnings("deprecation")
public class App extends Configured implements Tool {



public int run(String[] args) throws Exception {

Path inpath = new Path(args[0]);
Path outpath = new Path(args[1]);

Configuration conf = getConf();

conf.set(ParquetOutputFormat.BLOCK_SIZE, Integer.toString(128 * 1024 * 1024));
conf.set(ParquetOutputFormat.COMPRESSION, "SNAPPY");

MessageType messagetype = new MessageType("employee",
new PrimitiveType(REQUIRED, BINARY, "empname"),
new PrimitiveType(OPTIONAL, BINARY, "designation"));

DataWritableWriteSupport.setSchema(messagetype,conf);

System.out.println("Schema: " + messagetype.toString());

Job job = new Job(conf, "parquet-convert");

job.setJarByClass(getClass());
job.setJobName(getClass().getName());
job.setMapOutputKeyClass(Void.class);
job.setMapOutputValueClass(ArrayWritable.class);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(ArrayWritable.class);
job.setMapperClass(ParquetMapper.class);
job.setNumReduceTasks(0);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(ParquetOutputFormat.class);

FileInputFormat.setInputPaths(job, inpath);
ParquetOutputFormat.setOutputPath(job, outpath);

ParquetOutputFormat.setWriteSupportClass(job, DataWritableWriteSupport.class);

boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}

public static void main( String[] args ) throws Exception
{
int returnCode = ToolRunner.run(new Configuration(), new App(), args);
System.exit(returnCode);
}

}

Now we are good to go, build the project using maven and run your job on hadoop cluster.make sure you provide enough java heap to map task to avoid OOM.

Reading a parquet files using parquet tools


Saturday, August 8, 2015

Apache Drill : Creating Simple UDF

In this example I will demonstrate you how to create a simple Custom UDF in Apache Drill.
Env : Apache Drill 1.1.0
Java : Jdk-1.7_75

objective of the function is to get the row as an input and return the length of the string.in this example we are getting employee full name as input and returning the length of input employee name.

Step 1 : Create a simple java project using maven and add the drill-1.1.0 dependencies in the pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.mapr.simplefn.test</groupId>
<artifactId>my-simple-drill-fn</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>my-simple-drill-fn</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.drill.exec</groupId>
<artifactId>drill-java-exec</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>package</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Step 2: Create a java class with the name of EmpNameLength which implements DrillSimpleFunc interface.

package com.mapr.simplefn.test;


import javax.inject.Inject;

import io.netty.buffer.DrillBuf;

import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;

@FunctionTemplate(name="empnlen",scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
public class EmpNameLength implements DrillSimpleFunc{

@Param
NullableVarCharHolder input;

@Output
VarCharHolder out;

@Inject
DrillBuf buffer;

@Override
public void setup() {
// TODO Auto-generated method stub

}

@Override
public void eval() {
String stringValue = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
int outValue = stringValue.length();
String outputValue = String.valueOf(outValue);
out.buffer = buffer;
out.start = 0;
out.end = outputValue.getBytes().length;
buffer.setBytes(0,outputValue.getBytes());

}

}

Step 3: Add a empty drill-override.conf in the resources folder of the project.
Step 4: run mvn package which will build the jar and created in the target folder,copy the jars to the on each node of the drill cluster on the location DRILL_HOME/jars/3rdparty/
Step 5: restart the Drillbits and run the query as follows

Expected Result:
0: jdbc:drill:zk=local> select empnlen(full_name) from cp.`employee.json` limit 5;
+---------+
| EXPR$0  |
+---------+
| 12      |
| 15      |
| 14      |
| 14      |
| 15      |
+---------+

Code Snippet to create a table in MapR-Db